# ENSEMBL DATABASE
# v102 is for the latest mm10 version
# ensembl.v102 <- useMart(host = "https://nov2020.archive.ensembl.org",
# biomart = "ENSEMBL_MART_ENSEMBL",
# dataset = "mmusculus_gene_ensembl")
palette_1 = list(red = "#E9002D",
amber = "#FFAA00",
green = "#00B000")
palette_2 = list(red = "#FF1F5B",
green = "#00CD6C",
blue = "#009ADE",
purple = "#AF58BA",
yellow = "#FFC61E",
orange = "#F28522",
grey = "#A0B1BA",
brown = "#A6761D")
palette_3 = list(grey1 = "#a0b1ba",
grey2 = "#c5d0d5",
grey3 = "#eceff1")
colorListLoop <- c(palette_3[["grey3"]], palette_3[["grey2"]], palette_3[["grey1"]],
"#C5E1EF", "#6CB0D6", "#226E9C",
"#06592A",
"#FED976", "#FD8D3C", "#E31A1C")
colorListPromoter <- c(palette_3[["grey3"]],"#E88587", "#E31A1C")
colorListEnhancer <- c(palette_3[["grey3"]],"#87AFC7", "#226E9C")
colorListStructure <- c(palette_3[["grey3"]], palette_3[["grey2"]], palette_3[["grey1"]])
#
# loopDir <- here("../..", "result", "loop", "chromo_detect")
# scoreDir <- here("../..", "result", "loop", "chromo_quantify")
# consensusDir <- here("../..", "result", "loop", "consensus")
# figDir <- here("../..", "figure", "loop")
# refDir <- here("../..", "reference")
library(colorspace)
fontType <- "Helvetica"
fontSizeL <- 10 # pt
fontSizeM <- 8
fontSizeS <- 6
lineThick <- 0.75 # pt
lineMedium <- 0.5
lineThin <- 0.25
panelUnit <- 30 # mm
panelMargin <- 1.5
mmToInch <- 0.03937007874
mmToLineUnit <- 1/2.13
mmToLinePlotgarden <- 1/0.75
ptToMM <- 1/2.845
strong_red <- "#CB333A"
strong_blue <- "#4851A0"
weak_red <- lighten(strong_red, amount = 0.4) # FF7D81
weak_blue <- lighten(strong_blue, amount = 0.4) # 8A91DD
no_grey <- "#A8A8A8"
strong_teel <- "#0892A5"
strong_green <- "#23CE6B" # A485
strong_darkgreen <- "#054A29"
strong_yellow <- "#FFBA49"
strong_orange <- "#F18F01" # dTAG
strong_lightpurple <- "#BD93D8"
strong_purple <- "#9E33CB" # Epi
panelSize <- function(num, unit = panelUnit, margin = panelMargin){
return(num*unit - 2*margin)
}
importBedpe = function(bedpe){
a1 = makeGRangesFromDataFrame(data.frame(
chr = bedpe$V1,
start = bedpe$V2 +1,
end = bedpe$V3))
a2 = makeGRangesFromDataFrame(data.frame(
chr = bedpe$V4,
start = bedpe$V5 +1,
end = bedpe$V6))
GInteractions(a1, a2)
}
get_density <- function(x, y, ...) {
dens <- MASS::kde2d(x, y, ...)
ix <- findInterval(x, dens$x)
iy <- findInterval(y, dens$y)
ii <- cbind(ix, iy)
return(dens$z[ii])
}
label_kb_mb <- function(x) {
ifelse(x >= 1000000, paste0(x / 1000000, "Mb"), paste0(x / 1000, "kb"))
}
importPeak = function(fileName){
df = fread(fileName)
gr = makeGRangesFromDataFrame(data.frame(
chr = df$V1, start = df$V2, end = df$V3
))
}
diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
dplyr::select(id, UT, AID)
score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
dplyr::mutate(diff_G1 = dTAG - DMSO,
diff_async = AID - UT)
### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-1, 0.5) + ylim(-1, 0.5) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3,
p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3,
p4, p5, p6, align = "h", ncol = 3))
dev.off()
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(res = V3 - V2,
id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id
diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
dplyr::select(id, UT, AID)
score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
dplyr::mutate(diff_G1 = dTAG - DMSO,
diff_async = AID - UT) %>%
dplyr::filter(id %in% regID)
### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-1, 0.5) + ylim(-1, 0.5) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_reg.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3,
p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_reg.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3,
p4, p5, p6, align = "h", ncol = 3))
dev.off()
########## Saving Async and G1 specific regulatory loop
diffCutoff <- 0.2
temp <- score.tb %>% dplyr::filter(diff_G1 >= -diffCutoff, diff_async >= -diffCutoff) %>%
dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothRetained.bedpe"), sep = "\t", col.names = FALSE)
temp <- score.tb %>% dplyr::filter(diff_G1 >= -diffCutoff, diff_async < -diffCutoff) %>%
dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_AsyncSpecificPert.bedpe"), sep = "\t", col.names = FALSE)
temp <- score.tb %>% dplyr::filter(diff_G1 < -diffCutoff, diff_async >= -diffCutoff) %>%
dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_G1SpecificPert.bedpe"), sep = "\t", col.names = FALSE)
temp <- score.tb %>% dplyr::filter(diff_G1 < -diffCutoff, diff_async < -diffCutoff) %>%
dplyr::select(seq(1, 7))
fwrite(temp, here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothPert.bedpe"), sep = "\t", col.names = FALSE)
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(res = V3 - V2,
id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id
diffCutoff = 0.2
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- fread(here(consensusDir, "chromo_cons_score_async.tsv")) %>%
dplyr::select(id, UT, AID)
score.tb <- data %>% dplyr::full_join(temp, by = c("id")) %>%
dplyr::mutate(diff_G1 = dTAG - DMSO,
diff_async = AID - UT) %>%
dplyr::filter(id %in% regID)
### P1. UT vs AID
score.tb$density <- get_density(score.tb$UT, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$UT, score.tb$AID)
p1 <- ggplot(score.tb, aes(x = UT, y = AID, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P1. DMSO vs dTAG
score.tb$density <- get_density(score.tb$DMSO, score.tb$dTAG, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$dTAG)
p2 <- ggplot(score.tb, aes(x = DMSO, y = dTAG, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. DMSO vs A485
score.tb$density <- get_density(score.tb$DMSO, score.tb$A485, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$A485)
p3 <- ggplot(score.tb, aes(x = DMSO, y = A485, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. DMSO vs UT
score.tb$density <- get_density(score.tb$DMSO, score.tb$UT, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$DMSO, score.tb$UT)
p4 <- ggplot(score.tb, aes(x = DMSO, y = UT, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. dTAG vs AID
score.tb$density <- get_density(score.tb$dTAG, score.tb$AID, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$dTAG, score.tb$AID)
p5 <- ggplot(score.tb, aes(x = dTAG, y = AID, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
### P3. diff
score.tb$density <- get_density(score.tb$diff_G1, score.tb$diff_async, n = 100)
score.tb <- score.tb %>% dplyr::arrange(density)
correlation <- cor(score.tb$diff_G1, score.tb$diff_async)
p6 <- ggplot(score.tb, aes(x = diff_G1, y = diff_async, color = density)) +
geom_point() +
scale_color_viridis() +
xlim(-1, 0.5) + ylim(-1, 0.5) +
coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
geom_hline(yintercept = diffCutoff, alpha = 0.5, color = "grey") +
geom_hline(yintercept = -diffCutoff, alpha = 0.5, color = "grey") +
geom_vline(xintercept = diffCutoff, alpha = 0.5, color = "grey") +
geom_vline(xintercept = -diffCutoff, alpha = 0.5, color = "grey") +
theme_classic() + ggtitle(paste0("Consensus loop score")) +
annotate("text", x = -1, y = 0.5, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
png(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_str.png")), res = 600, units = "in", width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3,
p4, p5, p6, align = "h", ncol = 3))
dev.off()
svglite(here(figDir, paste0("consensus_score_scatterplot_pu100pz100_G1vsAsync_str.svg")), width = 5*2.5, height = 2.5*2.5)
print(cowplot::plot_grid(p1, p2, p3,
p4, p5, p6, align = "h", ncol = 3))
dev.off()
# Importing loops. For make comparison easier, 25 kb
binSize = 25*1000
temp <- fread(here(refDir, "Tjian_chromosight_loop.bedpe")) %>%
dplyr::mutate(center1 = 0.5*(V2 + V3),
center2 = 0.5*(V5 + V6),
start1 = center1 - 0.5*binSize,
end1 = center1 + 0.5*binSize,
start2 = center2 - 0.5*binSize,
end2 = center2 + 0.5*binSize) %>%
dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.async <- importBedpe(temp)
binSize = 25*1000
temp <- fread(here(consensusDir, "chromo_cons.bedpe")) %>%
dplyr::mutate(center1 = 0.5*(V2 + V3),
center2 = 0.5*(V5 + V6),
start1 = center1 - 0.5*binSize,
end1 = center1 + 0.5*binSize,
start2 = center2 - 0.5*binSize,
end2 = center2 + 0.5*binSize) %>%
dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.G1 <- importBedpe(temp)
# Check overlap
overlap <- findOverlaps(loop.async, loop.G1)
n.async <- nrow(as_tibble(loop.async))
n.G1 <- nrow(as_tibble(loop.G1))
n.async.overlap <- length(unique(queryHits(overlap)))
n.G1.overlap <- length(unique(subjectHits(overlap)))
plot(euler(c("Async" = n.async - n.async.overlap,
"Async&G1" = n.async.overlap,
"G1" = 1)), quantities = TRUE)
plot(euler(c("Async" =1,
"Async&G1" = n.G1.overlap,
"G1" = n.G1 - n.G1.overlap)), quantities = TRUE)
# Importing loops. For make comparison easier, 25 kb
binSize = 25*1000
temp.pp <- fread(here(refDir, "Tjian_chromosight_loop_P-P.bedpe")) %>%
dplyr::mutate(center1 = 0.5*(V2 + V3),
center2 = 0.5*(V5 + V6),
start1 = center1 - 0.5*binSize,
end1 = center1 + 0.5*binSize,
start2 = center2 - 0.5*binSize,
end2 = center2 + 0.5*binSize) %>%
dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp.pp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
binSize = 25*1000
temp.ep <- fread(here(refDir, "Tjian_chromosight_loop_E-P.bedpe")) %>%
dplyr::mutate(center1 = 0.5*(V2 + V3),
center2 = 0.5*(V5 + V6),
start1 = center1 - 0.5*binSize,
end1 = center1 + 0.5*binSize,
start2 = center2 - 0.5*binSize,
end2 = center2 + 0.5*binSize) %>%
dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp.ep) <- c("V1", "V2", "V3", "V4", "V5", "V6")
temp <- bind_rows(temp.pp, temp.ep)
loop.async <- importBedpe(temp)
binSize = 25*1000
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(center1 = 0.5*(V2 + V3),
center2 = 0.5*(V5 + V6),
start1 = center1 - 0.5*binSize,
end1 = center1 + 0.5*binSize,
start2 = center2 - 0.5*binSize,
end2 = center2 + 0.5*binSize) %>%
dplyr::select(V1, start1, end1, V4, start2, end2)
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loop.G1 <- importBedpe(temp)
# Check overlap
overlap <- findOverlaps(loop.async, loop.G1)
n.async <- nrow(as_tibble(loop.async))
n.G1 <- nrow(as_tibble(loop.G1))
n.async.overlap <- length(unique(queryHits(overlap)))
n.G1.overlap <- length(unique(subjectHits(overlap)))
plot(euler(c("Async" = n.async - n.async.overlap,
"Async&G1" = n.async.overlap,
"G1" = 1)), quantities = TRUE)
plot(euler(c("Async" =0.1,
"Async&G1" = n.G1.overlap,
"G1" = n.G1 - n.G1.overlap)), quantities = TRUE)
#### Importing ChIP-exo peaks
refDir <- here("../..", "reference")
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K4me3 <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
peak.Whyte.SE <- importPeak(here(refDir, "superEnhancer_Whyte_ESC_mm10.bed"))
peak.Dylan.SE <- importPeak(here(refDir, "superEnhancer_Dylan_ESC.bed"))
createLoopAnnotation <- function(bedpe.loop.anno, name, figDir, outDir, colorList){
temp = bedpe.loop.anno %>%
dplyr::mutate(sample = name)
num = nrow(temp)
p7 = ggplot(temp, aes(x = sample, fill = Anno2)) +
geom_bar(color = "black") +
theme_bw() +
labs(title = paste0(num, " loops"),
x = "", y = "Counts") +
scale_y_continuous(labels = comma_format()) +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 5,
legend.position = "right",
legend.direction = "vertical") +
scale_fill_manual(values = colorList)
width = 3
height = 5
svglite(here(figDir,
paste0("loopClassify_", name, ".svg")),
width = width, height = height)
plot(p7)
invisible(dev.off())
png(here(figDir,
paste0("loopClassify_", name, ".png")),
width = width, height = height, res = 600, units = "in")
plot(p7)
invisible(dev.off())
}
annotateLoopRelaxedTSS <- function(bedpe.anno){
temp = bedpe.anno %>% dplyr::rowwise() %>%
dplyr::mutate(
A1 = if_else((A1_H3K4me3TSS), "P",
if_else((A1_H3K27ac), "E",
if_else((A1_CTCF|A1_RAD21), "S", "X"))),
A2 = if_else((A2_H3K4me3TSS), "P",
if_else((A2_H3K27ac), "E",
if_else((A2_CTCF|A2_RAD21), "S", "X")))
)
temp = temp %>% dplyr::rowwise() %>%
dplyr::mutate(Anno = paste0(A1, "-", A2),
Anno2 = if_else(Anno == "E-P", "P-E",
if_else(Anno == "S-P", "P-S",
if_else(Anno == "X-P", "P-X",
if_else(Anno == "S-E", "E-S",
if_else(Anno == "X-E", "E-X",
if_else(Anno == "X-S", "S-X",
Anno)))))),
)
temp$Anno2 = factor(temp$Anno2, level = c("X-X",
"S-X", "S-S",
"E-X","E-S","E-E",
"P-E","P-X", "P-S", "P-P"))
# Checking the precense of super enhancer
temp <- temp %>% dplyr::rowwise() %>%
dplyr::mutate(AnnoSE = ifelse(A1_Whyte.SE | A2_Whyte.SE, "SE", "NO"))
temp$AnnoSE <- factor(temp$AnnoSE, level = c("SE", "NO"))
return(temp)
}
annotateLoopPromoterTSS <- function(bedpe.anno){
temp = bedpe.anno %>% dplyr::rowwise() %>%
dplyr::mutate(
A1 = if_else((A1_H3K4me3TSS), "P", "N"),
A2 = if_else((A2_H3K4me3TSS), "P", "N")
)
temp = temp %>% dplyr::rowwise() %>%
dplyr::mutate(Anno = paste0(A1, "-", A2),
Anno2 = if_else(Anno == "N-P", "P-N", Anno))
temp$Anno2 = factor(temp$Anno2, level = c("N-N", "P-N", "P-P"))
return(temp)
}
annotateLoopEnhancer <- function(bedpe.anno){
temp = bedpe.anno %>% dplyr::rowwise() %>%
dplyr::mutate(
A1 = if_else((A1_H3K27ac), "E", "N"),
A2 = if_else((A2_H3K27ac), "E", "N")
)
temp = temp %>% dplyr::rowwise() %>%
dplyr::mutate(Anno = paste0(A1, "-", A2),
Anno2 = if_else(Anno == "N-E", "E-N", Anno))
temp$Anno2 = factor(temp$Anno2, level = c("N-N", "E-N", "E-E"))
return(temp)
}
annotateLoopStructure <- function(bedpe.anno){
temp = bedpe.anno %>% dplyr::rowwise() %>%
dplyr::mutate(
A1 = if_else((A1_CTCF|A1_RAD21), "S", "N"),
A2 = if_else((A2_CTCF|A2_RAD21), "S", "N")
)
temp = temp %>% dplyr::rowwise() %>%
dplyr::mutate(Anno = paste0(A1, "-", A2),
Anno2 = if_else(Anno == "N-S", "S-N", Anno))
temp$Anno2 = factor(temp$Anno2, level = c("N-N", "S-N", "S-S"))
return(temp)
}
annotateAnchorTSS <- function(bedpe){
tb.loop =
setOverlapColumn("Whyte.SE",
setOverlapColumn("CTCF",
setOverlapColumn("RAD21",
setOverlapColumn("H3K27ac",
setOverlapColumn("H3K4me3TSS", bedpe)))))
return(tb.loop)
}
setOverlapColumn <- function(peakName, loop){
tb.loop = as_tibble(loop)
overlap = returnOverlapIndexLixt(get(paste0("peak.", peakName)), tb.loop)
tb.loop[[paste0("A1_", peakName)]] = FALSE
tb.loop[[paste0("A1_", peakName)]][overlap[[1]]] = TRUE
tb.loop[[paste0("A2_", peakName)]] = FALSE
tb.loop[[paste0("A2_", peakName)]][overlap[[2]]] = TRUE
return(tb.loop)
}
returnOverlapIndexLixt <- function(peak, loop){
anchor1.tb = as_tibble(loop) %>% dplyr::select(chrom1, start1, end1)
anchor1 = makeGRangesFromDataFrame(data.frame(
chr = anchor1.tb$chrom1,
start = anchor1.tb$start1,
end = anchor1.tb$end1
))
anchor2.tb = as_tibble(loop) %>% dplyr::select(chrom2, start2, end2)
anchor2 = makeGRangesFromDataFrame(data.frame(
chr = anchor2.tb$chrom2,
start = anchor2.tb$start2,
end = anchor2.tb$end2
))
overlap = list(overlap1 = unique(queryHits(findOverlaps(anchor1, peak))),
overlap2 = unique(queryHits(findOverlaps(anchor2, peak))))
return(overlap)
}
saveAnnoGroupBedpe <- function(temp, anno.list, name, annoName, ouDir){
loop = temp %>% dplyr::filter(Anno2 %in% anno.list) %>%
dplyr::select(c("chrom1", "start1", "end1", "chrom2", "start2", "end2"))
fwrite(loop, here(outDir, paste0(name, "_", annoName, ".bedpe")), sep = "\t", col.names = FALSE)
}
consensus.loop.tb <- fread(here(consensusDir, "chromo_cons_score.tsv"))
################################################################################
# Filtering H3K4me3 peaks that has TSS nearby
# Since the finest resolution is 5kb, +-2.5kb will be used as a cutoff for checking TSS presence
flankSize <- 2500
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend)
colnames(gene.tb) <- c("chr", "start", "end")
TSS1kb.gr <- makeGRangesFromDataFrame(gene.tb)
temp <- peak.H3K4me3[unique(queryHits(findOverlaps(peak.H3K4me3, TSS1kb.gr)))]
fwrite(as_tibble(temp), here(refDir,
paste0("33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.",
flankSize/1000, "kbTSS.bed")), sep = "\t", col.names = FALSE)
peak.H3K4me3TSS <- importPeak(here(refDir, paste0("33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.",
flankSize/1000, "kbTSS.bed")))
################################################################################
# Annotating with strict priority (P-TSS > E > S)
temp.anno.TSS <- annotateAnchorTSS(consensus.loop.tb) %>%
dplyr::mutate(
diff_dTAG_DMSO = (dTAG-DMSO),
diff_A485_DMSO = (A485-DMSO))
name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- annotateLoopRelaxedTSS(temp.anno.TSS)
fwrite(consensus.loop.anno.tb, here(consensusDir, paste0(name, ".tsv")),
sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.tb, name, figDir, outDir, colorListLoop)
saveAnnoGroupBedpe(consensus.loop.anno.tb, unique(consensus.loop.anno.tb$Anno2) , name, "all", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"), name, "regulatory", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("S-S", "S-X"), name, "structure", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("X-X"), name, "x-x", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "P-S", "P-X"), name, "p-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E", "E-E"), name, "pe-pe", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P", "P-E"), name, "p-pe", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-P"), name, "p-p", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-E"), name, "p-e", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-S"), name, "p-s", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.tb, c("P-X"), name, "p-x", outDir)
################################################################################
# Annotating with one marker (Promoter)
name <- "chromo_cons_annoPromoter"
consensus.loop.anno.promoter.tb <- annotateLoopPromoterTSS(temp.anno.TSS)
fwrite(consensus.loop.anno.promoter.tb, here(consensusDir, paste0(name, ".tsv")),
sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.promoter.tb, name,
figDir, outDir, colorListPromoter)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, unique(consensus.loop.anno.promoter.tb$Anno2) , name, "all", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("P-P"), name, "p-p", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("P-N"), name, "p-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.promoter.tb, c("N-N"), name, "n-n", outDir)
################################################################################
# Annotating with one marker (Enhancer)
name <- "chromo_cons_annoEnhancer"
consensus.loop.anno.enhancer.tb <- annotateLoopEnhancer(temp.anno.TSS)
fwrite(consensus.loop.anno.enhancer.tb, here(consensusDir, paste0(name, ".tsv")),
sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.enhancer.tb, name,
figDir, outDir, colorListEnhancer)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, unique(consensus.loop.anno.enhancer.tb$Anno2) , name, "all", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("E-E"), name, "e-e", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("E-N"), name, "e-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.enhancer.tb, c("N-N"), name, "n-n", outDir)
################################################################################
# Annotating with one marker (Structure)
name <- "chromo_cons_annoStructure"
consensus.loop.anno.structure.tb <- annotateLoopStructure(temp.anno.TSS)
fwrite(consensus.loop.anno.structure.tb, here(consensusDir, paste0(name, ".tsv")),
sep = "\t", col.names = TRUE)
createLoopAnnotation(consensus.loop.anno.structure.tb, name,
figDir, outDir, colorListStructure)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, unique(consensus.loop.anno.structure.tb$Anno2) , name, "all", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("S-S"), name, "s-s", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("S-N"), name, "s-n", outDir)
saveAnnoGroupBedpe(consensus.loop.anno.structure.tb, c("N-N"), name, "n-n", outDir)
################################################################################
# 2024.09.10 Splitting regulatory loop into pure regulatory and structure-related loops
name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- fread(here(consensusDir, paste0(name, ".tsv")))
regulatory.loop.anno.tb <- consensus.loop.anno.tb %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
regulatory.loop.anno.tb <- regulatory.loop.anno.tb %>% rowwise() %>%
dplyr::mutate(
Anno3 = ifelse(A1_RAD21 | A1_CTCF | A2_RAD21 | A2_RAD21, paste0("str"),
paste0("reg"))
)
fwrite(regulatory.loop.anno.tb, here(consensusDir, paste0(name, "_Anno3.tsv")),
sep = "\t", col.names = TRUE)
regulatory.loop.anno.tb_reg <- regulatory.loop.anno.tb %>% dplyr::filter(Anno3 == "reg")
regulatory.loop.anno.tb_str <- regulatory.loop.anno.tb %>% dplyr::filter(Anno3 == "str")
saveAnnoGroupBedpe(regulatory.loop.anno.tb_reg, c("P-P", "P-E", "E-E"), name, "pe-pe_reg", outDir)
saveAnnoGroupBedpe(regulatory.loop.anno.tb_str, c("P-P", "P-E", "E-E"), name, "pe-pe_str", outDir)
create_loop_dis_vs_score <- function(data, figDir, name, Anno2List){
data = data %>% dplyr::filter(Anno2 %in% Anno2List)
### barplot
temp <- data %>% dplyr::select(id, DMSO, dTAG, A485) %>%
pivot_longer(!id, names_to = "treatment", values_to = "score")
temp$treatment <- factor(temp$treatment, levels = c("DMSO", "dTAG", "A485"))
p3 <- ggplot(temp, aes(x = treatment, y = score)) +
geom_violin(aes(fill = treatment), show.legend = FALSE) +
scale_fill_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
ylim(-0.5, 1) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
theme_classic() +
ggtitle(name) +
theme(plot.title = element_text(size = 4))
fileName <- paste0("score_barplot_", name)
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = 2, height = 4)
print(p3)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 2, height = 4)
print(p3)
dev.off()
### Distance vs score
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, dTAG, A485)
avg_scores <- temp %>%
group_by(distance) %>%
summarise(across(starts_with("DMSO"):starts_with("A485"), mean, na.rm = TRUE))
avg_scores_long <- avg_scores %>%
pivot_longer(cols = DMSO:A485, names_to = "condition", values_to = "avg_score")
avg_scores_long$condition <- factor(avg_scores_long$condition, levels = c("DMSO", "dTAG", "A485"))
# Create the plot
p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
geom_smooth(show.legend = TRUE) + ylim(0, 0.5) +
theme_classic() + scale_x_continuous(labels = label_kb_mb) +
scale_color_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
scale_fill_manual(values = c("DMSO" = "grey80", "dTAG" = "pink", "A485" = "skyblue")) +
labs(title = paste0(name),
x = "Distance",
y = "Average Score") +
theme(plot.title = element_text(size = 5))
fileName <- paste0("dist_vs_score_linePlot_", name)
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = 4, height = 3)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 4, height = 3)
print(p4)
dev.off()
}
create_loop_dis_vs_diffscore <- function(data, figDir, name, Anno2List){
data = data %>% dplyr::filter(Anno2 %in% Anno2List)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, diff_A485_DMSO)
avg_scores <- temp %>%
group_by(distance) %>%
summarise(across(1:2, mean, na.rm = TRUE))
avg_scores_long <- avg_scores %>%
pivot_longer(cols = 2:3, names_to = "condition", values_to = "avg_score")
avg_scores_long$condition <- factor(avg_scores_long$condition,
levels = c("diff_dTAG_DMSO", "diff_A485_DMSO"))
# Create the plot
p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
geom_smooth(show.legend = TRUE) + geom_hline(yintercept = 0) + ylim(-0.5, 0.1) +
theme_classic() + scale_x_continuous(labels = label_kb_mb) +
scale_color_manual(values = c("diff_dTAG_DMSO" = "pink", "diff_A485_DMSO" = "skyblue")) +
scale_fill_manual(values = c("diff_dTAG_DMSO" = "pink", "diff_A485_DMSO" = "skyblue")) +
labs(title = paste0(name),
x = "Distance",
y = "Average Diff Score") +
theme(plot.title = element_text(size = 5))
fileName <- paste0("dist_vs_score_difflinePlot_", name)
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = 4.5, height = 3)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 4.5, height = 3)
print(p4)
dev.off()
}
create_loop_scatterplot <- function(data, figDir, name, Anno2List, diffCutoff){
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
dplyr::filter(Anno2 %in% Anno2List)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)
### Scatterplot
data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
correlation <- cor(data$DMSO, data$dTAG)
p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
geom_point(size = 1,
alpha = 1,
stroke = 0) +
scale_color_viridis(option = "D", guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)) +
xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"),
color = "black", hjust = 0, , size = 1,
family = fontType) +
annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"),
color = "black", hjust = 0, , size = 1,
family = fontType) +
annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"),
color = "black", hjust = 0, , size = 1,
family = fontType) +
theme_classic() +
#annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black") +
theme(
# legend.position = "none",
plot.title = element_text(
hjust = 0.5,
size = fontSizeS,
family = fontType
),
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
) +
xlab("Loop score\nG1.DMSO") +
ylab("Loop score\nG1.dTAG")
num.up <- (summary(data$updown_A485_DMSO))["UP"]
num.no <- (summary(data$updown_A485_DMSO))["NO"]
num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)
data$density <- get_density(data$DMSO, data$A485, n = 100)
data <- data %>% dplyr::arrange(density)
correlation <- cor(data$DMSO, data$A485)
p2 <- ggplot(data, aes(x = DMSO, y = A485, color = density)) +
geom_point(size = 1,
alpha = 1,
stroke = 0) +
scale_color_viridis(option = "D", guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)) +
xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey",
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"),
color = "black", hjust = 0, size = 1,
family = fontType) +
annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"),
color = "black", hjust = 0, size = 1,
family = fontType) +
annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"),
color = "black", hjust = 0, size = 1,
family = fontType) +
theme_classic() +
# annotate("text", x = -0.5, y = 1, label = paste("r =", round(correlation, 2)), size = 5, color = "black") +
theme(
# legend.position = "none",
plot.title = element_text(
hjust = 0.5,
size = fontSizeS,
family = fontType
),
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
) +
xlab("Loop score\nG1.DMSO") +
ylab("Loop score\nG1.A485")
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- here(figDir,paste0("scatterplot_", name, "_dTAG_vs_DMSO_", diffCutoff))
svglite(paste0(fileName, ".svg"), width = width, height =height)
print(p1)
dev.off()
png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
print(p1)
dev.off()
fileName <- here(figDir, paste0("scatterplot_", name, "_A485_vs_DMSO_", diffCutoff))
svglite(paste0(fileName, ".svg"), width = width, height =height)
print(p2)
dev.off()
png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
print(p2)
dev.off()
#
#
#
# png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
# print(cowplot::plot_grid(p1, p2, align = "h"))
# dev.off()
#
# svglite(here(figDir, paste0(fileName, ".svg")),
# width = 5*1.5, height = 2.5*1.5)
# print(cowplot::plot_grid(p1, p2, align = "h"))
# dev.off()
}
make_diff_bedpe <- function(data, name, Anno2List, outDir, diffCutoff){
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
dplyr::filter(Anno2 %in% Anno2List)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_UP_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_NO_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_DOWN_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_UP_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_NO_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_DOWN_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
}
create_dist_vs_avgScore_perTreatment <- function(data, figDir, name, loopList, colorList, se = FALSE){
avg_scores_long <- data %>%
group_by(distance, Anno2) %>%
summarise(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) +
geom_smooth(show.legend = TRUE, se = se) +
ylim(0, 0.5) +
theme_classic() + scale_x_continuous(labels = label_kb_mb) +
scale_color_manual(values = colorList) +
scale_fill_manual(values = colorList) +
labs(title = paste0(name),
x = "Distance",
y = "Average Score") +
theme(plot.title = element_text(size = 8))
fileName <- paste0("dist_vs_score_linePlot_", name)
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = 4, height = 3)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 4, height = 3)
print(p4)
dev.off()
}
create_dist_vs_avgDiffScore_perTreatment <- function(data, figDir, name, loopList, colorList, se = FALSE){
avg_scores_long <- data %>%
group_by(distance, Anno2) %>%
summarise(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) +
geom_hline(yintercept = 0) +
geom_smooth(show.legend = TRUE, se = se) +
theme_classic() + scale_x_continuous(labels = label_kb_mb) +
scale_color_manual(values = colorList) +
scale_fill_manual(values = colorList) +
labs(title = paste0(name),
x = "Distance",
y = "Average Diff Score") +
theme(plot.title = element_text(size = 8))
fileName <- paste0("dist_vs_score_difflinePlot_", name)
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = 4, height = 3)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 4, height = 3)
print(p4)
dev.off()
}
create_score_barplot_perTreatment <- function(data, figDir, name, loopList, colorList){
data$Anno2 <- factor(data$Anno2, levels = loopList)
p3 <- ggplot(data, aes(x = Anno2, y = score)) +
geom_violin(aes(fill = Anno2), show.legend = FALSE) +
scale_fill_manual(values = colorList) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
ylim(-0.5, 1) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
theme_classic() +
ggtitle(name) +
theme(plot.title = element_text(size = 4))
fileName <- paste0("score_barPlot_", name)
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = 2, height = 4)
print(p3)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 2, height = 4)
print(p3)
}
iterate_loop_functions <- function(data, figDir, groupName, annoList){
# create_loop_dis_vs_score(data, figDir, groupName, annoList)
# create_loop_dis_vs_diffscore(data, figDir, groupName, annoList)
create_loop_scatterplot(data, figDir, groupName, annoList, 0.2)
# create_loop_scatterplot(data, figDir, groupName, annoList, 0.1)
# make_diff_bedpe(data, groupName, annoList, consensusDir, 0.2)
# make_diff_bedpe(data, groupName, annoList, consensusDir, 0.1)
}
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_structure"),
c("S-S", "S-X"))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-s"),
c("S-S"))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_regulatory"),
c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"))
iterate_loop_functions(data, figDir, paste0(name, "_", "cl_pe-pe"),
c("P-P", "P-E", "E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-pe"),
c("P-P", "P-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-p"),
c("P-P"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-e"),
c("P-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-e"),
c("E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "x-x"),
c("X-X"))
###########
# Creating differential scatterplot
View(data)
#######################
# Creating figures per each condition
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"), c("X-X",
"S-X", "S-S",
"E-X","E-S","E-E",
"P-E","P-X", "P-S", "P-P"), colorListLoop)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), c("X-X",
"S-X", "S-S",
"E-X","E-S","E-E",
"P-E","P-X", "P-S", "P-P"), colorListLoop)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"), c("X-X",
"S-X", "S-S",
"E-X","E-S","E-E",
"P-E","P-X", "P-S", "P-P"), colorListLoop)
#######################
# Creating figures per each condition, differential
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"), c("X-X",
"S-X", "S-S",
"E-X","E-S","E-E",
"P-E","P-X", "P-S", "P-P"), colorListLoop)
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG_SE"), c("X-X",
"S-X", "S-S",
"E-X","E-S","E-E",
"P-E","P-X", "P-S", "P-P"), colorListLoop, se = TRUE)
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"), c("X-X",
"S-X", "S-S",
"E-X","E-S","E-E",
"P-E","P-X", "P-S", "P-P"), colorListLoop)
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485_SE"), c("X-X",
"S-X", "S-S",
"E-X","E-S","E-E",
"P-E","P-X", "P-S", "P-P"), colorListLoop, se = TRUE)
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, "_Anno3.tsv")))
diffCutoff <- 0.2
# Splitting data
data.reg <- data %>% dplyr::filter(Anno3 == "reg")
data.str <- data %>% dplyr::filter(Anno3 == "str")
# Taking codes from previous function (str)
data <- data.str
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)
data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"),
color = "black", hjust = 0, size = 3) +
theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))
# Taking codes from previous function (reg)
data <- data.reg
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)
data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p2 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"),
color = "black", hjust = 0, size = 3) +
theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))
fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_pure_str_reg")
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, "_Anno3.tsv")))
data <- data %>% dplyr::mutate(size = start2 - start1)
data_under50kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size < 50*1000, Anno3 == "reg")
data_under200kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size >= 50*1000, size < 200*1000, Anno3 == "reg")
data_over200kb <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"), size >= 200*1000, Anno3 == "reg")
########
data <- data_under50kb
diffCutoff <- 0.2
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
####
num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)
data$density <- get_density(data$DMSO, data$dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
p1 <- ggplot(data, aes(x = DMSO, y = dTAG, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"),
color = "black", hjust = 0, size = 3) +
theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))
####
num.up <- (summary(data$updown_A485_DMSO))["UP"]
num.no <- (summary(data$updown_A485_DMSO))["NO"]
num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)
data$density <- get_density(data$DMSO, data$A485, n = 100)
data <- data %>% dplyr::arrange(density)
p2 <- ggplot(data, aes(x = DMSO, y = A485, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
xlim(-0.5, 1) + ylim(-0.5, 1) + coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
annotate("text", x = -0.5, y = 1, label = paste0("UP: ", num.up, " (", perc.up, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = -0.5, y = 1-0.1, label = paste0("NO: ", num.no, " (", perc.no, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = -0.5, y = 1-0.2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"),
color = "black", hjust = 0, size = 3) +
theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5))
fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_regunder50kb_pure")
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
name <- "chromo_cons_annoPromoter"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-p"), c("P-P"))
iterate_loop_functions(data, figDir, paste0(name, "_", "p-n"), c("P-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))
colorList <- colorListPromoter
loopList <- c("N-N", "P-N", "P-P")
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList, se = TRUE)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList, se = TRUE)
name <- "chromo_cons_annoEnhancer"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-e"), c("E-E"))
iterate_loop_functions(data, figDir, paste0(name, "_", "e-n"), c("E-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))
colorList <- colorListEnhancer
loopList <- c("N-N", "E-N", "E-E")
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList, se = TRUE)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList, se = TRUE)
name <- "chromo_cons_annoStructure"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
iterate_loop_functions(data, figDir, paste0(name, "_", "all"), unique(data$Anno2))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-s"), c("S-S"))
iterate_loop_functions(data, figDir, paste0(name, "_", "s-n"), c("S-N"))
iterate_loop_functions(data, figDir, paste0(name, "_", "n-n"), c("N-N"))
colorList <- colorListStructure
loopList <- c("N-N", "S-N", "S-S")
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_DMSO"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_DMSO"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList, se = TRUE)
create_score_barplot_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_dTAG"),
loopList, colorList, se = TRUE)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment(temp, figDir, paste0(name, "_A485"),
loopList, colorList, se = TRUE)
data <- fread(here(consensusDir, "chromo_cons_score.tsv")) %>%
dplyr::mutate(distance = start2 - start1,
res = end1 - start1)
p1 <- ggplot(data, aes(x = distance, fill = factor(res))) +
geom_histogram(binwidth = 50000, alpha = 1) +
labs(title = "Distribution of Distance by Resolution",
x = "Distance",
y = "Counts") +
facet_wrap(~ res, ncol = 1, scales = "free") +
scale_x_continuous(labels = label_kb_mb, limits = c(0, 5e6)) +
theme_classic() +
theme(legend.position = "none",
plot.title = element_text(size = 8)) # Removes the legend
png(here(figDir, paste0("consensus_dist_per_res.png")),
res = 600, units = "in", width = 3, height = 6)
print(p1)
dev.off()
p2 <- ggplot(data, aes(x = distance, fill = factor(res))) +
geom_histogram(binwidth = 50000, alpha = 1) +
labs(title = "Distribution of Distance by Resolution",
x = "Distance",
y = "Counts") +
facet_wrap(~ res, ncol = 1, scales = "free") +
scale_x_continuous(labels = label_kb_mb, limits = c(0, 2e6)) +
theme_classic() +
theme(legend.position = "none",
plot.title = element_text(size = 8)) # Removes the legend
png(here(figDir, paste0("consensus_dist_per_res_2mb.png")),
res = 600, units = "in", width = 3, height = 6)
print(p2)
dev.off()
### Distance vs score
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, DMSO, dTAG, A485)
avg_scores <- temp %>%
group_by(distance) %>%
summarise(across(starts_with("DMSO"):starts_with("A485"), mean, na.rm = TRUE))
avg_scores_long <- avg_scores %>%
pivot_longer(cols = DMSO:A485, names_to = "condition", values_to = "avg_score")
avg_scores_long$condition <- factor(avg_scores_long$condition, levels = c("DMSO", "dTAG", "A485"))
# Create the plot
p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = condition, fill = condition)) +
geom_smooth(show.legend = TRUE) + ylim(0, 0.5) +
theme_classic() + scale_x_continuous(labels = label_kb_mb) +
scale_color_manual(values = c("DMSO" = "grey", "dTAG" = "pink", "A485" = "skyblue")) +
scale_fill_manual(values = c("DMSO" = "grey80", "dTAG" = "pink", "A485" = "skyblue")) +
labs(title = paste0("Distance vs. Average Score, ", note),
x = "Distance",
y = "Average Score") +
theme(plot.title = element_text(size = 12))
png(here(figDir, paste0("consensus_dist_vs_avgScore_allRes_pu100pz100_", note, ".png")),
res = 600, units = "in", width = 4, height = 3)
print(p4)
dev.off()
Here, anchor was linked to a gene based on whether the anchor overlaps with TSS +- 2.5kb region
# Annotating genes based on promoter anchor
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
flankSize <- 2500
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V4, V5, V6)
colnames(gene.TSS.tb) <- c("chr", "start", "end", "strand", "gene", "ensembl")
fwrite(gene.TSS.tb, here(refDir, "mm10_GRCm38.p6_TSS2.5kb.bed"), sep = "\t", col.names = FALSE, row.names = FALSE)
findOverlapGene <- function(gene.TSS.tb, chrom1, start1, end1){
temp <- gene.TSS.tb %>% dplyr::filter(chr == chrom1) %>%
dplyr::filter((start <= end1) & (end >= start1))
return(temp$gene)
}
findOverlapEnsembl <- function(gene.TSS.tb, chrom1, start1, end1){
temp <- gene.TSS.tb %>% dplyr::filter(chr == chrom1) %>%
dplyr::filter((start <= end1) & (end >= start1))
return(temp$ensembl)
}
temp <- data %>% rowwise() %>%
dplyr::mutate(A1_gene = ifelse(A1 == "P",
list(findOverlapGene(gene.TSS.tb, chrom1, start1, end1)),
NA),
A2_gene = ifelse(A2 == "P",
list(findOverlapGene(gene.TSS.tb, chrom2, start2, end2)),
NA),
gene = list(unique(c(A1_gene, A2_gene))))
temp_p_n <- temp %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "P-S", "P-X")) %>% dplyr::select(-c("A1_gene", "A2_gene"))
fwrite(temp_p_n, here(consensusDir, paste0(name, "_p-n_geneList.tsv")), sep = "\t")
temp_p_pe <- temp %>% dplyr::filter(Anno2 %in% c("P-P", "P-E")) %>% dplyr::select(-c("A1_gene", "A2_gene"))
fwrite(temp_p_pe, here(consensusDir, paste0(name, "_p-pe_geneList.tsv")), sep = "\t")
temp <- data %>% rowwise() %>%
dplyr::mutate(A1_gene = ifelse(A1 == "P",
list(findOverlapEnsembl(gene.TSS.tb, chrom1, start1, end1)),
NA),
A2_gene = ifelse(A2 == "P",
list(findOverlapEnsembl(gene.TSS.tb, chrom2, start2, end2)),
NA),
gene = list(unique(c(A1_gene, A2_gene))))
temp_p_n <- temp %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "P-S", "P-X")) %>% dplyr::select(-c("A1_gene", "A2_gene"))
fwrite(temp_p_n, here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")), sep = "\t")
temp_p_pe <- temp %>% dplyr::filter(Anno2 %in% c("P-P", "P-E")) %>% dplyr::select(-c("A1_gene", "A2_gene"))
fwrite(temp_p_pe, here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")), sep = "\t")
create_dist_barplot <- function(data, figDir, name, note, loopList, diffCutoff){
temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
dplyr::filter(Anno2 %in% loopList,
updown_dTAG_DMSO %in% c("UP", "NO", "DOWN"))
p <- ggplot(temp, aes(x = updown_dTAG_DMSO, y = distance)) +
geom_violin(aes(fill = updown_dTAG_DMSO)) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = updown_dTAG_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + ggtitle(note) +
scale_y_continuous(labels = label_kb_mb)
fileName <- paste0("size_barplot_", name, "_dTAG_vs_DMSO_", note, "_", diffCutoff)
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
dplyr::filter(Anno2 %in% loopList,
updown_A485_DMSO %in% c("UP", "NO", "DOWN"))
p <- ggplot(temp, aes(x = updown_A485_DMSO, y = distance)) +
geom_violin(aes(fill = updown_A485_DMSO)) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = updown_A485_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + ggtitle(note) +
scale_y_continuous(labels = label_kb_mb)
fileName <- paste0("size_barplot_", name, "_A485_vs_DMSO_", note, "_", diffCutoff)
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
name
diffCutoff <- 0.2
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")),
distance = start2 - start1)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
create_dist_barplot(data, figDir, name, "p-n", c("P-P", "P-E", "P-S", "P-X"), 0.2)
create_dist_barplot(data, figDir, name, "p-pe", c("P-P", "P-E"), 0.2)
create_dist_barplot(data, figDir, name, "pe-pe", c("P-P", "P-E", "E-E"), 0.2)
create_dist_barplot(data, figDir, name, "str", c("S-S", "S-X"), 0.2)
GOdir <- here("../..", "result", "loop", "GO")
dir.create(GOdir, showWarnings = FALSE, recursive = TRUE)
getGO <- function(name, figDir, geneList, categoryNum = 15, height = 10, width = 7){
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
fwrite(as.data.frame(GO), here(GOdir, paste0("GO_", name, "_ensembl.tsv")), sep = "\t")
fwrite(as.data.frame(GO.readable), here(GOdir, paste0("GO_", name, "_readable.tsv")), sep = "\t")
if(nrow(as.data.frame(GO)) != 0){
#####
fileName <- paste0("GO_", name)
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(dotplot(GO, showCategory = categoryNum, title = name) +
scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
dev.off()
png(here(figDir, paste0(fileName, ".png")), height = height, width = width, res = 600, unit = "in")
print(dotplot(GO, showCategory = categoryNum, title = name) +
scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
dev.off()
}
}
convPvalue <- function(pvalue){
out <- ifelse(pvalue < 0.0001, "****",
ifelse(pvalue < 0.001, "***",
ifelse(pvalue < 0.01, "**",
ifelse(pvalue < 0.05, "*", "ns"))))
return(out)
}
loadLoopAnnoData <- function(fileName, diffCutoff = 0.2, annoList = c("P-P", "P-E", "P-S", "P-X")){
data <- fread(fileName) %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")),
gene = strsplit(gene, '\\|')) %>%
dplyr::filter(Anno2 %in% annoList)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
return(data)
}
loopType <- "p-n"
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1)
# geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
# diffCutoff = diffCutoff,
# annoList = c("P-P"))%>%
# dplyr::mutate(distance = start2 - start1)
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id
geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")
#
# p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
# label = ifelse(flag != "0NO", external_gene_name, NA),
# shape = as.factor(maxFlag))) +
# geom_point() + geom_text_repel() + theme_classic() +
# ggtitle("dTAG") +
# geom_hline(yintercept = 0) +
# geom_hline(yintercept = diffCutoff, linetype = "dashed") +
# geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
# geom_vline(xintercept = 0) +
# geom_vline(xintercept = fcCutoff, linetype = "dashed") +
# geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
# scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
# scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
#
# fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff)
# height <- 4
# width <- 7
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
# print(p)
# dev.off()
# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
# fwrite(temp, here(consensusDir, paste0("gene_avgScore_fc_dTAG_", loopType, "_noFCcutoff.tsv")), sep = "\t")
p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point(size = 1, alpha = 1,
stroke = 0) + geom_text_repel() + theme_classic() +
guides(color = "none", shape = "none") +
geom_hline(yintercept = 0, alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_hline(yintercept = c(diffCutoff, -diffCutoff), linetype = "dashed", alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_vline(xintercept = 0, alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_vline(xintercept = c(fcCutoff, -fcCutoff), linetype = "dashed", alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
theme(
axis.title.x = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) + labs(y = "Avg. Δ loop score", x = "log2(shrunken FC)") +
scale_color_manual(values = c("0NO" = "#A9A8A9", "1UP" = "#CB333A", "2DOWN" = "#4852A0")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_noFCcutoff")
height <- 42*mmToInch
width <- 50*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# No FC cutoff option - up and down
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.updown.RNA.noFCcutoff, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_noFCcutoffupdown")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
# ## RNA + PRO-seq
# temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, gene) %>%
# unnest(gene) %>% group_by(gene) %>%
# summarize(mean_diff_score = mean(diff_dTAG_DMSO), .groups = 'drop')
#
# temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
# dplyr::mutate(flag = ifelse(gene %in% geneList.down, "2DOWN",
# ifelse(gene %in% geneList.up, "1UP", "0NO")),
# maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
# shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
# dplyr::arrange(flag)
#
# fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_", loopType, ".tsv"), sep = "\t")
#
# p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
# label = ifelse(flag != "0NO", external_gene_name, NA),
# shape = as.factor(maxFlag))) +
# geom_point() + geom_text_repel() + theme_classic() +
# ggtitle("dTAG") +
# geom_hline(yintercept = 0) +
# geom_hline(yintercept = diffCutoff, linetype = "dashed") +
# geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
# geom_vline(xintercept = 0) +
# geom_vline(xintercept = fcCutoff, linetype = "dashed") +
# geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
# scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
# scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
#
# fileName <- paste0("log2FC_vs_avgScore_dTAG_", loopType, "_diffCutoff_", diffCutoff, "_RNA-PRO")
# height <- 4
# width <- 7
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
# print(p)
# dev.off()
loopType <- "p-pe"
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1)
# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id
geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_", loopType, "_avgLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_", loopType, "_avgLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1)
# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id
geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = sum(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_sumLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = sum(log_obsexp_diff_dTAG_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_sumLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1)
# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id
geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO[which.max(abs(log_obsexp_diff_dTAG_DMSO))], .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_absLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO[which.max(abs(log_obsexp_diff_dTAG_DMSO))], .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_absLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1)
# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id
geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>%
unnest(gene) %>% group_by(gene) %>%
slice_min(distance, with_ties = FALSE) %>%
summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_closestLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>%
unnest(gene) %>% group_by(gene) %>%
slice_min(distance, with_ties = FALSE) %>%
summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_closestLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1)
# Loading OE loop score
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id))
geneList.down.RNA <- (diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.down.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff %>% dplyr::filter(shrinked_log2FC < 0))$ensembl_gene_id
geneList.updown.RNA.noFCcutoff <- (diff.RNA.G1.dTAG.noFCcutoff)$ensembl_gene_id
geneList.up <- unique(c((diff.RNA.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G1.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id,
(diff.PRO.G2.dTAG %>% dplyr::filter(shrinked_log2FC > 0))$ensembl_gene_id))
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 2
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>%
unnest(gene) %>% group_by(gene) %>%
slice_max(distance, with_ties = FALSE) %>%
summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
#fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_farthestLogOE")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# No FC cutoff option - down only
temp <- geneAnnoData %>% dplyr::select(log_obsexp_diff_dTAG_DMSO, gene, distance) %>%
unnest(gene) %>% group_by(gene) %>%
slice_max(distance, with_ties = FALSE) %>%
summarize(mean_diff_score = log_obsexp_diff_dTAG_DMSO, .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA.noFCcutoff, "2DOWN",
ifelse(gene %in% c(), "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
p <- ggplot(temp, aes(x = shrinked_log2FC, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("dTAG") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "grey", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_dTAG_OE_p-n_farthestLogOE_noFCcutoff")
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1)
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-n.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-n.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_sumScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-n.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-n_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E"))%>%
dplyr::mutate(distance = start2 - start1)
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-pe.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-pe.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_sumScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-pe.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-pe_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# getting list of genes of interest from RNA-seq and PRO-seq
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.A485 <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-S"))%>%
dplyr::mutate(distance = start2 - start1)
# Testing different ways to calculate representative feature of loops for each gene
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
geneList.down.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC < -fcCutoff))$ensembl_gene_id
geneList.up.RNA <- (diff.RNA.G1.A485 %>% dplyr::filter(shrinked_log2FC > fcCutoff))$ensembl_gene_id
# [1] Average of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_A485_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_avgScore_fc_dTAG_p-s.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = mean_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_avgScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# [2] Sum of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(sum_diff_score = sum(diff_A485_DMSO), .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_sumScore_fc_dTAG_p-s.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = sum_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_sumScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# [3] Max of differential loop scores for each gene
## RNA only
fcCutoff <- 0.5
alpha <- 0.05
maxLog2FC <- 4
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(max_abs_diff_score = diff_A485_DMSO[which.max(abs(diff_A485_DMSO))], .groups = 'drop')
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% geneList.down.RNA, "2DOWN",
ifelse(gene %in% geneList.up.RNA, "1UP", "0NO")),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
shrlog2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag)
fwrite(temp, here(consensusDir, "gene_maxAbsScore_fc_dTAG_p-s.tsv"), sep = "\t")
p <- ggplot(temp, aes(x = shrlog2fcMax, y = max_abs_diff_score, color = flag,
label = ifelse(flag != "0NO", external_gene_name, NA),
shape = as.factor(maxFlag))) +
geom_point() + geom_text_repel() + theme_classic() +
ggtitle("A485") +
geom_hline(yintercept = 0) +
geom_hline(yintercept = diffCutoff, linetype = "dashed") +
geom_hline(yintercept = - diffCutoff, linetype = "dashed") +
geom_vline(xintercept = 0) +
geom_vline(xintercept = fcCutoff, linetype = "dashed") +
geom_vline(xintercept = -fcCutoff, linetype = "dashed") +
scale_color_manual(values = c("0NO" = "black", "1UP" = "red", "2DOWN" = "blue")) + # Corrected color mapping
scale_shape_manual(values = c("TRUE" = 2, "FALSE" = 19))
fileName <- paste0("log2FC_vs_maxAbsScore_A485_p-s_diffCutoff_", diffCutoff)
height <- 4
width <- 7
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1,
peakID = paste(chrom1, start1, start2, sep = "_"))
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>% summarize(
peak = list(peakID),
anno2 = list(Anno2),
count = n())
ggplot(tempSum, aes(x = count)) + geom_histogram(binwidth = 1) + theme_classic() +
ggtitle("# of P-N loops for each gene") + scale_y_log10()
###################################################################################
# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X"))
tempSE <- geneAnnoData %>% unnest(gene) %>% group_by(gene) %>% summarize(SE = ifelse(any(AnnoSE == "SE"), 1, 0))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene
tempSE <- tempSE %>% rowwise() %>% dplyr::mutate(
group = ifelse(gene %in% group1, "group1",
ifelse(gene %in% group2, "group2",
ifelse(gene %in% group5, "group5",
ifelse(gene %in% group8, "group8", NA))))) %>%
dplyr::filter(!is.na(group))
perc.group1 <- nrow(tempSE %>% dplyr::filter(group == "group1", SE == 1))/
nrow(tempSE %>% dplyr::filter(group == "group1"))
perc.group2 <- nrow(tempSE %>% dplyr::filter(group == "group2", SE == 1))/
nrow(tempSE %>% dplyr::filter(group == "group2"))
perc.group5 <- nrow(tempSE %>% dplyr::filter(group == "group5", SE == 1))/
nrow(tempSE %>% dplyr::filter(group == "group5"))
perc.group8 <- nrow(tempSE %>% dplyr::filter(group == "group8", SE == 1))/
nrow(tempSE %>% dplyr::filter(group == "group8"))
tempPlot <- tibble(group = c("group1", "group2", "group5", "group8"),
perc = c(perc.group1, perc.group2, perc.group5, perc.group8))
ggplot(tempPlot, aes(x = group, y = perc)) + geom_point() + theme_classic() + ylim(0, 0.2)
plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("insulation_score_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO")
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1,
peakID = paste(chrom1, start1, start2, sep = "_"))
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
drop_na(shrinked_log2FC)
temp <- temp %>%
dplyr::mutate(group = ifelse(padj < 0.05 & shrinked_log2FC < 0, 1,
ifelse(mean_diff_score < 0, 2, 3)))
fcCutoff <- 0.5
diffCutoff <- 0.2
gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"), sep = "\t")
# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
group = ifelse(gene %in% gene.group1, "group1",
ifelse(gene %in% gene.group2, "group2", NA))
)
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE)
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE)
GO1.df <- as.data.frame(GO1)
GO2.df <- as.data.frame(GO2)
# fwrite(GO1.df, here("GO_binary_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_binary_group2.tsv"), sep = "\t")
GO1.df <- fread(here("GO_binary_group1.tsv"))
GO2.df <- fread(here("GO_binary_group2.tsv"))
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
dplyr::arrange(p.adjust)
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
dplyr::arrange(p.adjust)
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
GOlist <- factor(c("GO:0001824", "GO:0030879", "GO:0021953", "GO:0019827",
"GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055",
"GO:0006397", "GO:0030900", "GO:0008380"))
data <- bind_rows(subset1, subset2) %>%
dplyr::filter(ID %in% GOlist)
descOrder <- sort(unique(data$Description))[c(4, 10, 2, 1, 3, 11, 7, 8, 6, 5, 9)]
pValueLogMax <- 10
data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 1*ptToMM) +
scale_size_continuous(range = c(0.5, 2)) + # Set min and max point sizes here
scale_fill_gradient(low = "white", high = "#CB333A",
# limits = c(0, 1),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme_bw() + # Apply theme_bw first, so custom theme settings come after
theme(
panel.background = element_rect(fill = "transparent"), # Override theme_bw panel
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeS, # Ensure size is set for x-axis text
family = fontType,
color = "#000000",
),
axis.text.y = element_text(
size = fontSizeS, # Ensure size is set for y-axis text
family = fontType,
color = "#000000",
lineheight = 0.9 # Allows wrapping for y-axis labels to fit into 2 lines
),
axis.line = element_line(
color = "#000000",
size = lineThick * mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick * mmToLineUnit,
lineend = "square"
),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- here(figDir, "..", "GO", "GO_groups_binaryGrouping")
width <- panelSize(2.5)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)
temp <- temp %>% dplyr::filter(group %in% c(1, 2))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv12 <- round(getPvalWilcox(temp, 1, 2), 5)
p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")+
annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12),
color = "black", hjust = 0, size = 3)
fileName <- paste0("size_barplot_diffGroup_dTAG_vs_DMSO_binaryGroup")
height <- 3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>% summarize(
peak = list(peakID),
anno2 = list(Anno2),
count = n())
tempSum <- tempSum %>% dplyr::mutate(
group = ifelse(gene %in% gene.group1, "group1",
ifelse(gene %in% gene.group2, "group2", NA))
) %>% dplyr::filter(!is.na(group)) %>% dplyr::filter(group %in% c("group1", "group2"))
#
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$count
distance2 <- (data %>% dplyr::filter(group ==group2) )$count
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)
p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +
scale_y_continuous(breaks = seq(0, 10, by = 2)) +
labs(x = NULL, y = "# of P-N loops per gene") +
coord_cartesian(ylim = c(0, 8)) +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black"
)+
annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
color = "black", hjust = 0, size = 1) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
num_pp = sum(anno2 == "P-P"),
num_pe = sum(anno2 == "P-E"),
num_ps = sum(anno2 == "P-S"),
num_px = sum(anno2 == "P-X"),
ratio_reg = (num_pp + num_pe)/total,
ratio_str = num_ps/total)
#saveRDS(temp2, here(resultDir, "gene_loop_link_A485.rds"))
loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
num_pe = sum(num_pe),
num_ps = sum(num_ps),
num_px = sum(num_px))
loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")
loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))
# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +
scale_y_continuous(breaks = seq(0, 10, by = 2)) +
labs(x = NULL, y = "# of P-P loops per gene") +
coord_cartesian(ylim = c(0, 4)) +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black"
)+
annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
color = "black", hjust = 0, size = 1) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pp_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +
scale_y_continuous(breaks = seq(0, 10, by = 2)) +
labs(x = NULL, y = "# of P-E loops per gene") +
coord_cartesian(ylim = c(0, 4)) +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black"
)+
annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
color = "black", hjust = 0, size = 1) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pe_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA, color = "black", alpha = 0.6,
linewidth = lineThick * mmToLineUnit, lineend = "square", show.legend = FALSE) + theme_classic() +
scale_y_continuous(breaks = seq(0, 10, by = 2)) +
labs(x = NULL, y = "# of P-S loops per gene") +
coord_cartesian(ylim = c(0, 4)) +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black"
)+
annotate("text", x = 1, y = 3, label = paste0("pv12: ", convPvalue(pv12)),
color = "black", hjust = 0, size = 1) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_ps_binaryGroup")
width <- panelSize(0.8)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1,
peakID = paste(chrom1, start1, start2, sep = "_"))
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
drop_na(shrinked_log2FC)
fcCutoff <- 0.5
diffCutoff <- 0.2
#
temp <- temp %>%
dplyr::mutate(group = ifelse(mean_diff_score < -diffCutoff,
ifelse(shrinked_log2FC < -fcCutoff, 1,
ifelse(shrinked_log2FC <fcCutoff, 2, 3)),
ifelse(mean_diff_score < diffCutoff,
ifelse(shrinked_log2FC < -fcCutoff, 4,
ifelse(shrinked_log2FC < fcCutoff, 5, 6)),
ifelse(shrinked_log2FC < -fcCutoff, 7,
ifelse(shrinked_log2FC < fcCutoff, 8, 9)))))
gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
gene.group3 <- (temp %>% dplyr::filter(group == 3))$gene
gene.group4 <- (temp %>% dplyr::filter(group == 4))$gene
gene.group5 <- (temp %>% dplyr::filter(group == 5))$gene
gene.group6 <- (temp %>% dplyr::filter(group == 6))$gene
gene.group7 <- (temp %>% dplyr::filter(group == 7))$gene
gene.group8 <- (temp %>% dplyr::filter(group == 8))$gene
gene.group9 <- (temp %>% dplyr::filter(group == 9))$gene
fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 3)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group3.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 4)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group4.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 5)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 6)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group6.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 7)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group7.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 8)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 9)), here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group9.tsv"), sep = "\t")
# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
group = ifelse(gene %in% gene.group1, "group1",
ifelse(gene %in% gene.group2, "group2",
ifelse(gene %in% gene.group3, "group3",
ifelse(gene %in% gene.group4, "group4",
ifelse(gene %in% gene.group5, "group5",
ifelse(gene %in% gene.group6, "group6",
ifelse(gene %in% gene.group7, "group7",
ifelse(gene %in% gene.group8, "group8",
ifelse(gene %in% gene.group9, "group9", NA)))))))))
)
GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO5 <- enrichGO(gene = gene.group5, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO8 <- enrichGO(gene = gene.group8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO1.df <- as.data.frame(GO1)
GO2.df <- as.data.frame(GO2)
GO3.df <- as.data.frame(GO5)
GO4.df <- as.data.frame(GO8)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "group1") %>%
dplyr::mutate(
gr = sapply(GeneRatio, function(x) {
# Split the string by "/"
parts <- unlist(strsplit(x, "/"))
# Convert to numeric and perform the division
as.numeric(parts[1]) / as.numeric(parts[2])
})
) %>% dplyr::arrange(desc(gr))
subset2 <- GO2.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group2") %>%
dplyr::mutate(
gr = sapply(GeneRatio, function(x) {
# Split the string by "/"
parts <- unlist(strsplit(x, "/"))
# Convert to numeric and perform the division
as.numeric(parts[1]) / as.numeric(parts[2])
})
) %>% dplyr::arrange(desc(gr))
subset3 <- GO3.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group3") %>%
dplyr::mutate(
gr = sapply(GeneRatio, function(x) {
# Split the string by "/"
parts <- unlist(strsplit(x, "/"))
# Convert to numeric and perform the division
as.numeric(parts[1]) / as.numeric(parts[2])
})
) %>% dplyr::arrange(desc(gr))
GOlist <- factor(c("GO:0033002", "GO:0070373", "GO:0048730", "GO:0031103",
"GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055",
"GO:0006397", "GO:0030900", "GO:0008380"))
data <- bind_rows(bind_rows(subset1, subset2), subset3) %>%
dplyr::filter(ID %in% GOlist)
descOrder <- sort(unique(data$Description))[c(1, 2, 5, 7,
3, 4, 6, 8, 9, 10, 11)]
p <- ggplot(data, aes(x = group, y = factor(Description, level = descOrder), color = p.adjust, size = gr)) +
geom_point() + theme_bw() +
scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
scale_size_continuous(range = c(0, 3)) +
labs(x = NULL, y = NULL) +
theme(axis.text = element_text(size = 6), # Set axis text size
axis.title = element_text(size = 6), # Set axis title size (if not removed)
legend.text = element_text(size = 6), # Set legend text size
legend.title = element_text(size = 6))
fileName <- here(figDir, "..", "GO", "GO_groups")
height = 2
width = 3.5
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)
#
temp <- temp %>% dplyr::filter(group %in% c(1, 2, 5, 8))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv12 <- round(getPvalWilcox(temp, 1, 2), 5)
pv15 <- round(getPvalWilcox(temp, 1, 5), 5)
pv18 <- round(getPvalWilcox(temp, 1, 8), 5)
pv25 <- round(getPvalWilcox(temp, 2, 5), 5)
pv28 <- round(getPvalWilcox(temp, 2, 8), 5)
pv58 <- round(getPvalWilcox(temp, 5, 8), 5)
p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12, "\n",
"pv15: ", pv15, "\n",
"pv18: ", pv18, "\n",
"pv25: ", pv25, "\n",
"pv28: ", pv28, "\n",
"pv58: ", pv58, "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("size_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>% summarize(
peak = list(peakID),
anno2 = list(Anno2),
count = n())
tempSum <- tempSum %>% dplyr::mutate(
group = ifelse(gene %in% gene.group1, "group1",
ifelse(gene %in% gene.group2, "group2",
ifelse(gene %in% gene.group3, "group3",
ifelse(gene %in% gene.group4, "group4",
ifelse(gene %in% gene.group5, "group5",
ifelse(gene %in% gene.group6, "group6",
ifelse(gene %in% gene.group7, "group7",
ifelse(gene %in% gene.group8, "group8",
ifelse(gene %in% gene.group9, "group9", NA)))))))))
) %>%
dplyr::filter(group %in% c("group1", "group2", "group5", "group8"))
# TEMP START
#geneList.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))
#geneList.group1.temp <- geneList.group1 %>% dplyr::left_join(tempSum, by = c("gene"))
# TEMP END
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$count
distance2 <- (data %>% dplyr::filter(group ==group2) )$count
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)
p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 10, by = 2), limits = c(0, 10)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
"pv15: ", pv15, "\n",
"pv18: ", pv18, "\n",
"pv25: ", pv25, "\n",
"pv28: ", pv28, "\n",
"pv58: ", pv58, "\n"),
color = "black", hjust = 0, size = 1) + theme(legend.position = "none")
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
num_pp = sum(anno2 == "P-P"),
num_pe = sum(anno2 == "P-E"),
num_ps = sum(anno2 == "P-S"),
num_px = sum(anno2 == "P-X"),
ratio_reg = (num_pp + num_pe)/total,
ratio_str = num_ps/total)
saveRDS(temp2, here(resultDir, "gene_loop_link.rds"))
loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
num_pe = sum(num_pe),
num_ps = sum(num_ps),
num_px = sum(num_px))
loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")
loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))
# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
"pv15: ", pv15, "\n",
"pv18: ", pv18, "\n",
"pv25: ", pv25, "\n",
"pv28: ", pv28, "\n",
"pv58: ", pv58, "\n"),
color = "black", hjust = 0, size = 1)+ theme(legend.position = "none")
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pp")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
"pv15: ", pv15, "\n",
"pv18: ", pv18, "\n",
"pv25: ", pv25, "\n",
"pv28: ", pv28, "\n",
"pv58: ", pv58, "\n"),
color = "black", hjust = 0, size = 1)+ theme(legend.position = "none")
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_pe")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
"pv15: ", pv15, "\n",
"pv18: ", pv18, "\n",
"pv25: ", pv25, "\n",
"pv28: ", pv28, "\n",
"pv58: ", pv58, "\n"),
color = "black", hjust = 0, size = 1)+ theme(legend.position = "none")
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_ps")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########
#P-X
data <- temp2 %>% dplyr::select(group, num_px)
colnames(data) <- c("group", "count")
pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
"pv15: ", pv15, "\n",
"pv18: ", pv18, "\n",
"pv25: ", pv25, "\n",
"pv28: ", pv28, "\n",
"pv58: ", pv58, "\n"),
color = "black", hjust = 0, size = 1)+ theme(legend.position = "none")
fileName <- paste0("count_barplot_diffGroup_dTAG_vs_DMSO_px")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1,
peakID = paste(chrom1, start1, start2, sep = "_"))
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_A485_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
drop_na(shrinked_log2FC)
fcCutoff <- 0.5
diffCutoff <- 0.2
#
temp <- temp %>%
dplyr::mutate(group = ifelse(mean_diff_score < -diffCutoff,
ifelse(shrinked_log2FC < -fcCutoff, 1,
ifelse(shrinked_log2FC <fcCutoff, 2, 3)),
ifelse(mean_diff_score < diffCutoff,
ifelse(shrinked_log2FC < -fcCutoff, 4,
ifelse(shrinked_log2FC < fcCutoff, 5, 6)),
ifelse(shrinked_log2FC < -fcCutoff, 7,
ifelse(shrinked_log2FC < fcCutoff, 8, 9)))))
gene.group1 <- (temp %>% dplyr::filter(group == 1))$gene
gene.group2 <- (temp %>% dplyr::filter(group == 2))$gene
gene.group3 <- (temp %>% dplyr::filter(group == 3))$gene
gene.group4 <- (temp %>% dplyr::filter(group == 4))$gene
gene.group5 <- (temp %>% dplyr::filter(group == 5))$gene
gene.group6 <- (temp %>% dplyr::filter(group == 6))$gene
gene.group7 <- (temp %>% dplyr::filter(group == 7))$gene
gene.group8 <- (temp %>% dplyr::filter(group == 8))$gene
gene.group9 <- (temp %>% dplyr::filter(group == 9))$gene
fwrite((temp %>% dplyr::filter(group == 1)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group1.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 2)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group2.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 3)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group3.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 4)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group4.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 5)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group5.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 6)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group6.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 7)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group7.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 8)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group8.tsv"), sep = "\t")
fwrite((temp %>% dplyr::filter(group == 9)), here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group9.tsv"), sep = "\t")
# Adding group information to geneAnno
geneAnnoData <- geneAnnoData %>% unnest(gene) %>% dplyr::mutate(
group = ifelse(gene %in% gene.group1, "group1",
ifelse(gene %in% gene.group2, "group2",
ifelse(gene %in% gene.group3, "group3",
ifelse(gene %in% gene.group4, "group4",
ifelse(gene %in% gene.group5, "group5",
ifelse(gene %in% gene.group6, "group6",
ifelse(gene %in% gene.group7, "group7",
ifelse(gene %in% gene.group8, "group8",
ifelse(gene %in% gene.group9, "group9", NA)))))))))
)
GO1 <- enrichGO(gene = gene.group1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO2 <- enrichGO(gene = gene.group2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO3 <- enrichGO(gene = gene.group3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO4 <- enrichGO(gene = gene.group4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO5 <- enrichGO(gene = gene.group5, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO6 <- enrichGO(gene = gene.group6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO7 <- enrichGO(gene = gene.group7, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO8 <- enrichGO(gene = gene.group8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO9 <- enrichGO(gene = gene.group9, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
dotplot(GO1, showCategory = 10)
#
# GO1.df <- as.data.frame(GO1)
# GO2.df <- as.data.frame(GO2)
# GO3.df <- as.data.frame(GO5)
# GO4.df <- as.data.frame(GO8)
#
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "group1") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group2") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description,GeneRatio, p.adjust) %>% dplyr::mutate(group = "group3") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
#
# GOlist <- factor(c("GO:0033002", "GO:0070373", "GO:0048730", "GO:0031103",
# "GO:0022613", "GO:0050767", "GO:0034470", "GO:0016055",
# "GO:0006397", "GO:0030900", "GO:0008380"))
#
# data <- bind_rows(bind_rows(subset1, subset2), subset3) %>%
# dplyr::filter(ID %in% GOlist)
#
# descOrder <- sort(unique(data$Description))[c(1, 2, 5, 7,
# 3, 4, 6, 8, 9, 10, 11)]
#
# p <- ggplot(data, aes(x = group, y = factor(Description, level = descOrder), color = p.adjust, size = gr)) +
# geom_point() + theme_bw() +
# scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
# scale_size_continuous(range = c(0, 3)) +
# labs(x = NULL, y = NULL) +
# theme(axis.text = element_text(size = 6), # Set axis text size
# axis.title = element_text(size = 6), # Set axis title size (if not removed)
# legend.text = element_text(size = 6), # Set legend text size
# legend.title = element_text(size = 6))
#
# fileName <- here(figDir, "..", "GO", "GO_groups")
# height = 2
# width = 3.5
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
# Checking average distance of loops per gene
# temp is a tibble where delta loop and log2fc are merged
temp$group <- factor(temp$group)
#
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(group ==group1) )$mean_distance
# distance2 <- (data %>% dplyr::filter(group ==group2) )$mean_distance
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
# pv12 <- round(getPvalWilcox(temp, 1, 2), 5)
# pv15 <- round(getPvalWilcox(temp, 1, 5), 5)
# pv18 <- round(getPvalWilcox(temp, 1, 8), 5)
# pv25 <- round(getPvalWilcox(temp, 2, 5), 5)
# pv28 <- round(getPvalWilcox(temp, 2, 8), 5)
# pv58 <- round(getPvalWilcox(temp, 5, 8), 5)
p <- ggplot(temp, aes(x = group, y = mean_distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() + scale_y_continuous(labels = label_kb_mb) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
# annotate("text", x = 1, y = 1000000, label = paste0("pv12: ", pv12, "\n",
# "pv15: ", pv15, "\n",
# "pv18: ", pv18, "\n",
# "pv25: ", pv25, "\n",
# "pv28: ", pv28, "\n",
# "pv58: ", pv58, "\n"),
# color = "black", hjust = 0, size = 3)
#
fileName <- paste0("size_barplot_diffGroup_A485_vs_DMSO")
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# Counting number of loop per genes
tempSum <- geneAnnoData %>% dplyr::select(peakID, gene, Anno2) %>% unnest(gene) %>% group_by(gene) %>% summarize(
peak = list(peakID),
anno2 = list(Anno2),
count = n())
tempSum <- tempSum %>% dplyr::mutate(
group = ifelse(gene %in% gene.group1, "group1",
ifelse(gene %in% gene.group2, "group2",
ifelse(gene %in% gene.group3, "group3",
ifelse(gene %in% gene.group4, "group4",
ifelse(gene %in% gene.group5, "group5",
ifelse(gene %in% gene.group6, "group6",
ifelse(gene %in% gene.group7, "group7",
ifelse(gene %in% gene.group8, "group8",
ifelse(gene %in% gene.group9, "group9", NA)))))))))
) %>% dplyr::filter(!is.na(group))
#
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(group ==group1) )$count
# distance2 <- (data %>% dplyr::filter(group ==group2) )$count
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
#
# pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)
p <- ggplot(tempSum, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 10, by = 2)) +
coord_cartesian(ylim = c(0, 10)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme(legend.position = "none")
# annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
# "pv15: ", pv15, "\n",
# "pv18: ", pv18, "\n",
# "pv25: ", pv25, "\n",
# "pv28: ", pv28, "\n",
# "pv58: ", pv58, "\n"),
# color = "black", hjust = 0, size = 1) +
fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO")
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#######
temp2 <- tempSum %>% rowwise() %>% mutate(total = length(anno2),
num_pp = sum(anno2 == "P-P"),
num_pe = sum(anno2 == "P-E"),
num_ps = sum(anno2 == "P-S"),
num_px = sum(anno2 == "P-X"),
ratio_reg = (num_pp + num_pe)/total,
ratio_str = num_ps/total)
saveRDS(temp2, here(resultDir, "gene_loop_link_A485.rds"))
loopType <- temp2 %>% group_by(group) %>% summarise(num_pp = sum(num_pp),
num_pe = sum(num_pe),
num_ps = sum(num_ps),
num_px = sum(num_px))
loopTypeLong <- loopType %>% pivot_longer(-group, names_to = "type", values_to = "count")
loopTypeLong$type <- factor(loopTypeLong$type, levels = c("num_pp", "num_pe", "num_ps", "num_px"))
# Plotting
ggplot(loopTypeLong, aes(fill=type, y=count, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
########
#P-P
data <- temp2 %>% dplyr::select(group, num_pp)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme(legend.position = "none")
# annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
# "pv15: ", pv15, "\n",
# "pv18: ", pv18, "\n",
# "pv25: ", pv25, "\n",
# "pv28: ", pv28, "\n",
# "pv58: ", pv58, "\n"),
# color = "black", hjust = 0, size = 1)
fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_pp")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########
#P-E
data <- temp2 %>% dplyr::select(group, num_pe)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme(legend.position = "none")
# annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
# "pv15: ", pv15, "\n",
# "pv18: ", pv18, "\n",
# "pv25: ", pv25, "\n",
# "pv28: ", pv28, "\n",
# "pv58: ", pv58, "\n"),
# color = "black", hjust = 0, size = 1)
fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_pe")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########
#P-S
data <- temp2 %>% dplyr::select(group, num_ps)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme(legend.position = "none")
# annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
# "pv15: ", pv15, "\n",
# "pv18: ", pv18, "\n",
# "pv25: ", pv25, "\n",
# "pv28: ", pv28, "\n",
# "pv58: ", pv58, "\n"),
# color = "black", hjust = 0, size = 1)+
fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_ps")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########
#P-X
data <- temp2 %>% dplyr::select(group, num_px)
colnames(data) <- c("group", "count")
# pv12 <- round(getPvalWilcox(data,"group1", "group2"), 5)
# pv15 <- round(getPvalWilcox(data,"group1", "group5"), 5)
# pv18 <- round(getPvalWilcox(data,"group1", "group8"), 5)
# pv25 <- round(getPvalWilcox(data,"group2", "group5"), 5)
# pv28 <- round(getPvalWilcox(data,"group2", "group8"), 5)
# pv58 <- round(getPvalWilcox(data,"group5", "group8"), 5)
p <- ggplot(data, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 5, by = 2), limits = c(0, 5)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme(legend.position = "none")
# annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
# "pv15: ", pv15, "\n",
# "pv18: ", pv18, "\n",
# "pv25: ", pv25, "\n",
# "pv28: ", pv28, "\n",
# "pv58: ", pv58, "\n"),
# color = "black", hjust = 0, size = 1)+
fileName <- paste0("count_barplot_diffGroup_A485_vs_DMSO_px")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
name <- "chromo_cons_annoHierarchy"
alpha <- 0.05
fcCutoff <- 0.5
diff.PRO.G1.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G1.dTAG_vs_G1.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.PRO.G2.dTAG <- fread(here(refDir, "diff_G1G2.dTAG_G2.dTAG_vs_G2.DMSO_PROseq.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha)
############
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1,
peakID = paste(chrom1, start1, start2, sep = "_"))
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
pnOver = ifelse(gene %in% pnOver8, "pnOver8",
ifelse(gene %in% pnOver6, "pnOver6",
ifelse(gene %in% pnOver4, "pnOver4",
ifelse(gene %in% pnOver2, "pnOver2",
ifelse(gene %in% pnOver0, "pnOver0", NA)))))) %>%
drop_na(pnOver)
GOfigDir <- here(figDir, "../GO")
getGO("pnOver8", GOfigDir, pnOver8)
getGO("pnOver6", GOfigDir, pnOver6)
getGO("pnOver4", GOfigDir, pnOver4)
getGO("pnOver2", GOfigDir, pnOver2)
getGO("pnOver0", GOfigDir, pnOver0)
#####################
GO0.df <- as.data.frame(enrichGO(gene = pnOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO2.df <- as.data.frame(enrichGO(gene = pnOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO4.df <- as.data.frame(enrichGO(gene = pnOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO6.df <- as.data.frame(enrichGO(gene = pnOver6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
GO8.df <- as.data.frame(enrichGO(gene = pnOver8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver0") %>%
dplyr::mutate(
gr = sapply(GeneRatio, function(x) {
# Split the string by "/"
parts <- unlist(strsplit(x, "/"))
# Convert to numeric and perform the division
as.numeric(parts[1]) / as.numeric(parts[2])
})
) %>% dplyr::arrange(desc(gr))
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver2") %>%
dplyr::mutate(
gr = sapply(GeneRatio, function(x) {
# Split the string by "/"
parts <- unlist(strsplit(x, "/"))
# Convert to numeric and perform the division
as.numeric(parts[1]) / as.numeric(parts[2])
})
) %>% dplyr::arrange(desc(gr))
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver4") %>%
dplyr::mutate(
gr = sapply(GeneRatio, function(x) {
# Split the string by "/"
parts <- unlist(strsplit(x, "/"))
# Convert to numeric and perform the division
as.numeric(parts[1]) / as.numeric(parts[2])
})
) %>% dplyr::arrange(desc(gr))
subset6 <- GO6.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver6") %>%
dplyr::mutate(
gr = sapply(GeneRatio, function(x) {
# Split the string by "/"
parts <- unlist(strsplit(x, "/"))
# Convert to numeric and perform the division
as.numeric(parts[1]) / as.numeric(parts[2])
})
) %>% dplyr::arrange(desc(gr))
subset8 <- GO8.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver8") %>%
dplyr::mutate(
gr = sapply(GeneRatio, function(x) {
# Split the string by "/"
parts <- unlist(strsplit(x, "/"))
# Convert to numeric and perform the division
as.numeric(parts[1]) / as.numeric(parts[2])
})
) %>% dplyr::arrange(desc(gr))
GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
"GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165",
"GO:0072001", "GO:0007517", "GO:0048705"))
data <- bind_rows(bind_rows(bind_rows(subset0, subset2), subset4), subset8) %>%
dplyr::filter(ID %in% GOlist)
p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) +
geom_point() + theme_bw() +
scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
scale_size_continuous(range = c(0, 3)) +
labs(x = NULL, y = NULL) +
theme(axis.text = element_text(size = 6), # Set axis text size
axis.title = element_text(size = 6), # Set axis title size (if not removed)
legend.text = element_text(size = 6), # Set legend text size
legend.title = element_text(size = 6))
fileName <- here(figDir, "..", "GO", "GO_groups_pn")
height = 2
width = 3.3
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv02 <- round(getPvalWilcox(temp, "pnOver0", "pnOver2"), 5)
pv24 <- round(getPvalWilcox(temp, "pnOver2", "pnOver4"), 5)
pv46 <- round(getPvalWilcox(temp, "pnOver4", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(temp, "pnOver0", "pnOver8"), 5)
pv28 <- round(getPvalWilcox(temp, "pnOver2", "pnOver8"), 5)
pv48 <- round(getPvalWilcox(temp, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(temp, "pnOver6", "pnOver8"), 5)
p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0.2)+
annotate("text", x = 1, y = 0, label = paste0("pv02: ", convPvalue(pv02), "\n",
"pv24: ", convPvalue(pv24), "\n",
"pv46: ", convPvalue(pv46), "\n",
"pv68: ", convPvalue(pv68), "\n",
"pv48: ", convPvalue(pv48), "\n",
"pv28: ", convPvalue(pv28), "\n",
"pv08: ", convPvalue(pv08), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("diffScore_barplot_pnGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$log2FoldChange
distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$log2FoldChange
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
pv02 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver2"), 5)
pv04 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver4"), 5)
pv06 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver8"), 5)
pv24 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver4"), 5)
pv26 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver6"), 5)
pv28 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver8"), 5)
pv46 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver6"), 5)
pv48 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(tempDown, "pnOver6", "pnOver8"), 5)
p <- ggplot(tempDown, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)+
annotate("text", x = 1, y = -0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
"pv04: ", convPvalue(pv04), "\n",
"pv06: ", convPvalue(pv06), "\n",
"pv08: ", convPvalue(pv08), "\n",
"pv24: ", convPvalue(pv24), "\n",
"pv26: ", convPvalue(pv26), "\n",
"pv28: ", convPvalue(pv28), "\n",
"pv46: ", convPvalue(pv46), "\n",
"pv48: ", convPvalue(pv48), "\n",
"pv68: ", convPvalue(pv68), "\n"),
color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))
fileName <- paste0("log2FC_barplot_pnGroup_dTAG_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
pv02 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver2"), 5)
pv04 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver4"), 5)
pv06 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver6"), 5)
pv08 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver8"), 5)
pv24 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver4"), 5)
pv26 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver6"), 5)
pv28 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver8"), 5)
pv46 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver6"), 5)
pv48 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver8"), 5)
pv68 <- round(getPvalWilcox(tempUp, "pnOver6", "pnOver8"), 5)
p <- ggplot(tempUp, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)+
annotate("text", x = 1, y = 0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
"pv04: ", convPvalue(pv04), "\n",
"pv06: ", convPvalue(pv06), "\n",
"pv08: ", convPvalue(pv08), "\n",
"pv24: ", convPvalue(pv24), "\n",
"pv26: ", convPvalue(pv26), "\n",
"pv28: ", convPvalue(pv28), "\n",
"pv46: ", convPvalue(pv46), "\n",
"pv48: ", convPvalue(pv48), "\n",
"pv68: ", convPvalue(pv68), "\n"),
color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))
fileName <- paste0("log2FC_barplot_pnGroup_dTAG_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
psOver = ifelse(gene %in% psOver4, "psOver4",
ifelse(gene %in% psOver3, "psOver3",
ifelse(gene %in% psOver2, "psOver2",
ifelse(gene %in% psOver1, "psOver1",
ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
drop_na(psOver)
GOfigDir <- here(figDir, "../GO")
#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
fwrite(GO0.df, here("GO_PS_group0.tsv"), sep = "\t")
fwrite(GO1.df, here("GO_PS_group1.tsv"), sep = "\t")
fwrite(GO2.df, here("GO_PS_group2.tsv"), sep = "\t")
fwrite(GO3.df, here("GO_PS_group3.tsv"), sep = "\t")
fwrite(GO4.df, here("GO_PS_group4.tsv"), sep = "\t")
GO0.df <- fread(here("GO_PS_group0.tsv"))
GO1.df <- fread(here("GO_PS_group1.tsv"))
GO2.df <- fread(here("GO_PS_group2.tsv"))
GO3.df <- fread(here("GO_PS_group3.tsv"))
GO4.df <- fread(here("GO_PS_group4.tsv"))
subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
dplyr::arrange(p.adjust)
subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
"GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664"))
data <- bind_rows(bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset3), subset4) %>%
dplyr::filter(ID %in% GOlist)
descOrder <- sort(unique(data$Description))[rev(c(10, 4, 5, 9, 11,
2, 6,
3, 1, 8, 7))]
data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 1*ptToMM) +
scale_size_continuous(range = c(0.5, 2)) + # Set min and max point sizes here
scale_fill_gradient(low = "white", high = "#CB333A",
# limits = c(0, 1),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme_bw() + # Apply theme_bw first, so custom theme settings come after
theme(
panel.background = element_rect(fill = "transparent"), # Override theme_bw panel
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeS, # Ensure size is set for x-axis text
family = fontType,
color = "#000000",
),
axis.text.y = element_text(
size = fontSizeS, # Ensure size is set for y-axis text
family = fontType,
color = "#000000",
lineheight = 0.9 # Allows wrapping for y-axis labels to fit into 2 lines
),
axis.line = element_line(
color = "#000000",
size = lineThick * mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick * mmToLineUnit,
lineend = "square"
),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- here(figDir, "..", "GO", "GO_groups_ps")
width <- panelSize(2.7)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) +
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) +
scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
coord_cartesian(ylim = c(-0.5, 0.1))
fileName <- paste0("diffScore_barplot_psGroup_dTAG_vs_DMSO_2")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
ks_result1 <- ks.test(
temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
temp %>% dplyr::filter(psOver == "psOver3") %>% pull(absLog2FC)
)
ks_result2 <- ks.test(
temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
temp %>% dplyr::filter(psOver == "psOver2") %>% pull(absLog2FC)
)
ks_result3 <- ks.test(
temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
temp %>% dplyr::filter(psOver == "psOver1") %>% pull(absLog2FC)
)
ks_result4 <- ks.test(
temp %>% dplyr::filter(psOver == "psOver4") %>% pull(absLog2FC),
temp %>% dplyr::filter(psOver == "psOver0") %>% pull(absLog2FC)
)
p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
labs(
x = "Abs. log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 1.5)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_psGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)
p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
fileName <- paste0("log2FC_barplot_psGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))
fileName <- paste0("log2FC_barplot_psGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
psOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
psOver = ifelse(gene %in% psOver4, "psOver4",
ifelse(gene %in% psOver3, "psOver3",
ifelse(gene %in% psOver2, "psOver2",
ifelse(gene %in% psOver1, "psOver1",
ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
drop_na(psOver)
GOfigDir <- here(figDir, "../GO")
#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
# fwrite(GO0.df, here("GO_PE_group0.tsv"), sep = "\t")
# fwrite(GO1.df, here("GO_PE_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_PE_group2.tsv"), sep = "\t")
# fwrite(GO3.df, here("GO_PE_group3.tsv"), sep = "\t")
# fwrite(GO4.df, here("GO_PE_group4.tsv"), sep = "\t")
subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
dplyr::arrange(p.adjust)
subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
"GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664",
"GO:0040029"))
data <- bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset4) %>%
dplyr::filter(ID %in% GOlist)
descOrder <- sort(unique(data$Description))[rev(c(11, 5, 6, 10, 12,
1,
3, 7, 4, 2, 9, 8))]
empty_row3 <- data.frame(
ID = NA, # No specific ID
Description = NA, # No description
GeneRatio = NA, # No gene ratio
p.adjust = NA, # No p.adjust value
group = "Group 3" # Group to add as empty column
)
empty_row4 <- data.frame(
ID = NA, # No specific ID
Description = NA, # No description
GeneRatio = NA, # No gene ratio
p.adjust = NA, # No p.adjust value
group = "Group 4" # Group to add as empty column
)
# Append the empty row to your dataset
data <- rbind(data, empty_row3, empty_row4)
data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 1*ptToMM) +
scale_size_continuous(range = c(0.5, 2)) + # Set min and max point sizes here
scale_fill_gradient(low = "white", high = "#CB333A",
# limits = c(0, 1),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme_bw() + # Apply theme_bw first, so custom theme settings come after
theme(
panel.background = element_rect(fill = "transparent"), # Override theme_bw panel
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeS, # Ensure size is set for x-axis text
family = fontType,
color = "#000000",
),
axis.text.y = element_text(
size = fontSizeS, # Ensure size is set for y-axis text
family = fontType,
color = "#000000",
lineheight = 0.9 # Allows wrapping for y-axis labels to fit into 2 lines
),
axis.line = element_line(
color = "#000000",
size = lineThick * mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick * mmToLineUnit,
lineend = "square"
),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- here(figDir, "..", "GO", "GO_groups_pe")
width <- panelSize(2.65)*mmToInch
height <- panelSize(1.3)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) +
scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) +
coord_cartesian(ylim = c(-0.5, 0.1))
fileName <- paste0("diffScore_barplot_peGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
labs(
x = "Abs. log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 1.5)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_peGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)
p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
fileName <- paste0("log2FC_barplot_peGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))
fileName <- paste0("log2FC_barplot_peGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
psOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
psOver = ifelse(gene %in% psOver4, "psOver4",
ifelse(gene %in% psOver3, "psOver3",
ifelse(gene %in% psOver2, "psOver2",
ifelse(gene %in% psOver1, "psOver1",
ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
drop_na(psOver)
GOfigDir <- here(figDir, "../GO")
#####################
GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP", readable = TRUE))
#
# fwrite(GO0.df, here("GO_PP_group0.tsv"), sep = "\t")
# fwrite(GO1.df, here("GO_PP_group1.tsv"), sep = "\t")
# fwrite(GO2.df, here("GO_PP_group2.tsv"), sep = "\t")
# fwrite(GO3.df, here("GO_PP_group3.tsv"), sep = "\t")
# fwrite(GO4.df, here("GO_PP_group4.tsv"), sep = "\t")
subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 0") %>%
dplyr::arrange(p.adjust)
subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 1") %>%
dplyr::arrange(p.adjust)
subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 2") %>%
dplyr::arrange(p.adjust)
subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 3") %>%
dplyr::arrange(p.adjust)
subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "Group 4") %>%
dplyr::arrange(p.adjust)
subset0$GeneRatio <- sapply(strsplit(subset0$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset1$GeneRatio <- sapply(strsplit(subset1$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset2$GeneRatio <- sapply(strsplit(subset2$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset3$GeneRatio <- sapply(strsplit(subset3$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
subset4$GeneRatio <- sapply(strsplit(subset4$GeneRatio, "/"), function(x) as.numeric(x[1]) / as.numeric(x[2]))
GOlist <- factor(c("GO:0008380", "GO:0006397", "GO:0034470", "GO:0022613", "GO:0016055",
"GO:0061138", "GO:0060562", "GO:0007389", "GO:0060485", "GO:0048638", "GO:0045664"))
data <- bind_rows(subset0, subset1) %>%
dplyr::filter(ID %in% GOlist)
empty_row2 <- data.frame(
ID = NA, # No specific ID
Description = NA, # No description
GeneRatio = NA, # No gene ratio
p.adjust = NA, # No p.adjust value
group = "Group 2" # Group to add as empty column
)
empty_row3 <- data.frame(
ID = NA, # No specific ID
Description = NA, # No description
GeneRatio = NA, # No gene ratio
p.adjust = NA, # No p.adjust value
group = "Group 3" # Group to add as empty column
)
empty_row4 <- data.frame(
ID = NA, # No specific ID
Description = NA, # No description
GeneRatio = NA, # No gene ratio
p.adjust = NA, # No p.adjust value
group = "Group 4" # Group to add as empty column
)
# Append the empty row to your dataset
data <- rbind(data, empty_row2, empty_row3, empty_row4)
descOrder <- sort(unique(data$Description))[rev(c(12,
4, 5, 9, 11, 10,
2, 6, 3, 1, 8, 7))]
data <- data %>% dplyr::filter(Description %in% descOrder)
data <- data %>% dplyr::rowwise() %>% dplyr::mutate(pValueLog = min(-log10(p.adjust), pValueLogMax))
p <- ggplot(data, aes(x = group, y = factor(Description, levels = descOrder), size = pValueLog, fill = GeneRatio)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 1*ptToMM) +
scale_size_continuous(range = c(0.5, 2)) + # Set min and max point sizes here
scale_fill_gradient(low = "white", high = "#CB333A",
# limits = c(0, 1),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme_bw() + # Apply theme_bw first, so custom theme settings come after
theme(
panel.background = element_rect(fill = "transparent"), # Override theme_bw panel
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeS, # Ensure size is set for x-axis text
family = fontType,
color = "#000000",
),
axis.text.y = element_text(
size = fontSizeS, # Ensure size is set for y-axis text
family = fontType,
color = "#000000",
lineheight = 0.9 # Allows wrapping for y-axis labels to fit into 2 lines
),
axis.line = element_line(
color = "#000000",
size = lineThick * mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick * mmToLineUnit,
lineend = "square"
),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- here(figDir, "..", "GO", "GO_groups_pp")
width <- panelSize(2.65)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) +
scale_fill_manual(values = rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) +
coord_cartesian(ylim = c(-0.5, 0.1))
fileName <- paste0("diffScore_barplot_ppGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = psOver)) +
scale_color_manual(values = (rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")))) +
stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
labs(
x = "Abs. log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 1.5)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_ppGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)
p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(-1, 0))
fileName <- paste0("log2FC_barplot_ppGroup_dTAG_vs_DMSO_down")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) + # Five shades of grey
geom_violin(aes(fill = psOver),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "log2(fold change)") +
stat_summary(
aes(group = psOver), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) + coord_cartesian(ylim = c(0, 1))
fileName <- paste0("log2FC_barplot_ppGroup_dTAG_vs_DMSO_up")
width <- panelSize(1.18)*mmToInch
height <- panelSize(1)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
############
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1,
peakID = paste(chrom1, start1, start2, sep = "_"))
# temp2
#calculating diff score and log2fc distribution based on p-n numbers
temp2 <- readRDS(here(resultDir, "gene_loop_link_A485.rds"))
pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_A485_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
pnOver = ifelse(gene %in% pnOver8, "pnOver8",
ifelse(gene %in% pnOver6, "pnOver6",
ifelse(gene %in% pnOver4, "pnOver4",
ifelse(gene %in% pnOver2, "pnOver2",
ifelse(gene %in% pnOver0, "pnOver0", NA)))))) %>%
drop_na(pnOver)
# GOfigDir <- here(figDir, "../GO")
# getGO("pnOver8", GOfigDir, pnOver8)
# getGO("pnOver6", GOfigDir, pnOver6)
# getGO("pnOver4", GOfigDir, pnOver4)
# getGO("pnOver2", GOfigDir, pnOver2)
# getGO("pnOver0", GOfigDir, pnOver0)
#
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = pnOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = pnOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = pnOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO6.df <- as.data.frame(enrichGO(gene = pnOver6, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO8.df <- as.data.frame(enrichGO(gene = pnOver8, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
#
#
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver0") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver2") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver4") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset6 <- GO6.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver6") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset8 <- GO8.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "pnOver8") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
#
#
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
# "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165",
# "GO:0072001", "GO:0007517", "GO:0048705"))
#
# data <- bind_rows(bind_rows(bind_rows(subset0, subset2), subset4), subset8) %>%
# dplyr::filter(ID %in% GOlist)
#
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) +
# geom_point() + theme_bw() +
# scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
# scale_size_continuous(range = c(0, 3)) +
# labs(x = NULL, y = NULL) +
# theme(axis.text = element_text(size = 6), # Set axis text size
# axis.title = element_text(size = 6), # Set axis title size (if not removed)
# legend.text = element_text(size = 6), # Set legend text size
# legend.title = element_text(size = 6))
#
# fileName <- here(figDir, "..", "GO", "GO_groups_pn")
# height = 2
# width = 3.3
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
# distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
# pv02 <- round(getPvalWilcox(temp, "pnOver0", "pnOver2"), 5)
# pv24 <- round(getPvalWilcox(temp, "pnOver2", "pnOver4"), 5)
# pv46 <- round(getPvalWilcox(temp, "pnOver4", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(temp, "pnOver0", "pnOver8"), 5)
# pv28 <- round(getPvalWilcox(temp, "pnOver2", "pnOver8"), 5)
# pv48 <- round(getPvalWilcox(temp, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(temp, "pnOver6", "pnOver8"), 5)
p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = 0, label = paste0("pv02: ", convPvalue(pv02), "\n",
# "pv24: ", convPvalue(pv24), "\n",
# "pv46: ", convPvalue(pv46), "\n",
# "pv68: ", convPvalue(pv68), "\n",
# "pv48: ", convPvalue(pv48), "\n",
# "pv28: ", convPvalue(pv28), "\n",
# "pv08: ", convPvalue(pv08), "\n"),
# color = "black", hjust = 0, size = 3)
fileName <- paste0("diffScore_barplot_pnGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$log2FoldChange
# distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$log2FoldChange
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#
# pv02 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver2"), 5)
# pv04 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver4"), 5)
# pv06 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(tempDown, "pnOver0", "pnOver8"), 5)
# pv24 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver4"), 5)
# pv26 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver6"), 5)
# pv28 <- round(getPvalWilcox(tempDown, "pnOver2", "pnOver8"), 5)
# pv46 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver6"), 5)
# pv48 <- round(getPvalWilcox(tempDown, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(tempDown, "pnOver6", "pnOver8"), 5)
p <- ggplot(tempDown, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = -0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
# "pv04: ", convPvalue(pv04), "\n",
# "pv06: ", convPvalue(pv06), "\n",
# "pv08: ", convPvalue(pv08), "\n",
# "pv24: ", convPvalue(pv24), "\n",
# "pv26: ", convPvalue(pv26), "\n",
# "pv28: ", convPvalue(pv28), "\n",
# "pv46: ", convPvalue(pv46), "\n",
# "pv48: ", convPvalue(pv48), "\n",
# "pv68: ", convPvalue(pv68), "\n"),
# color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))
fileName <- paste0("log2FC_barplot_pnGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#
# pv02 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver2"), 5)
# pv04 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver4"), 5)
# pv06 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver6"), 5)
# pv08 <- round(getPvalWilcox(tempUp, "pnOver0", "pnOver8"), 5)
# pv24 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver4"), 5)
# pv26 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver6"), 5)
# pv28 <- round(getPvalWilcox(tempUp, "pnOver2", "pnOver8"), 5)
# pv46 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver6"), 5)
# pv48 <- round(getPvalWilcox(tempUp, "pnOver4", "pnOver8"), 5)
# pv68 <- round(getPvalWilcox(tempUp, "pnOver6", "pnOver8"), 5)
#
p <- ggplot(tempUp, aes(x = pnOver, y = log2FoldChange)) + geom_violin(aes(fill = pnOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = pnOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = 0.5, label = paste0("pv02: ", convPvalue(pv02), "\n",
# "pv04: ", convPvalue(pv04), "\n",
# "pv06: ", convPvalue(pv06), "\n",
# "pv08: ", convPvalue(pv08), "\n",
# "pv24: ", convPvalue(pv24), "\n",
# "pv26: ", convPvalue(pv26), "\n",
# "pv28: ", convPvalue(pv28), "\n",
# "pv46: ", convPvalue(pv46), "\n",
# "pv48: ", convPvalue(pv48), "\n",
# "pv68: ", convPvalue(pv68), "\n"),
# color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))
fileName <- paste0("log2FC_barplot_pnGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_A485_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
psOver = ifelse(gene %in% psOver4, "psOver4",
ifelse(gene %in% psOver3, "psOver3",
ifelse(gene %in% psOver2, "psOver2",
ifelse(gene %in% psOver1, "psOver1",
ifelse(gene %in% psOver0, "psOver0", NA)))))) %>%
drop_na(psOver)
# GOfigDir <- here(figDir, "../GO")
# getGO("psOver4", GOfigDir, psOver4)
# getGO("psOver3", GOfigDir, psOver3)
# getGO("psOver2", GOfigDir, psOver2)
# getGO("psOver1", GOfigDir, psOver1)
# getGO("psOver0", GOfigDir, psOver0)
#
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
#
#
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
#
#
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
# "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165",
# "GO:0072001", "GO:0007517", "GO:0048705", "GO:0003002"))
#
# data <- bind_rows(bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset3), subset4) %>%
# dplyr::filter(ID %in% GOlist)
#
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) +
# geom_point() + theme_bw() +
# scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
# scale_size_continuous(range = c(0, 3)) +
# labs(x = NULL, y = NULL) +
# theme(axis.text = element_text(size = 6), # Set axis text size
# axis.title = element_text(size = 6), # Set axis title size (if not removed)
# legend.text = element_text(size = 6), # Set legend text size
# legend.title = element_text(size = 6))
#
# fileName <- here(figDir, "..", "GO", "GO_groups_ps")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
# distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
#
p <- ggplot(temp, aes(x = psOver, y = mean_diff_score)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ", convPvalue(ps14), "\n",
# "ps04: ", convPvalue(ps04), "\n"),
# color = "black", hjust = 0, size = 3)
fileName <- paste0("diffScore_barplot_psGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
# distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
#
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)
p <- ggplot(tempDown, aes(x = psOver, y = log2FoldChange)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ", convPvalue(ps14), "\n",
# "ps04: ", convPvalue(ps04), "\n"),
# color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))
fileName <- paste0("log2FC_barplot_psGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
#
p <- ggplot(tempUp, aes(x = psOver, y = log2FoldChange)) + geom_violin(aes(fill = psOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = psOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ", convPvalue(ps14), "\n",
# "ps04: ", convPvalue(ps04), "\n"),
# color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))
fileName <- paste0("log2FC_barplot_psGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_A485_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
peOver = ifelse(gene %in% peOver4, "peOver4",
ifelse(gene %in% peOver3, "peOver3",
ifelse(gene %in% peOver2, "peOver2",
ifelse(gene %in% peOver1, "peOver1",
ifelse(gene %in% peOver0, "peOver0", NA)))))) %>%
drop_na(peOver)
# GOfigDir <- here(figDir, "../GO")
# getGO("peOver4", GOfigDir, psOver4)
# getGO("peOver3", GOfigDir, psOver3)
# getGO("peOver2", GOfigDir, psOver2)
# getGO("peOver1", GOfigDir, psOver1)
# getGO("peOver0", GOfigDir, psOver0)
#
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
#
#
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
#
#
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
# "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165",
# "GO:0072001", "GO:0007517", "GO:0048705",
# "GO:0040029", "GO:0010165"))
#
# data <- bind_rows(bind_rows(bind_rows(subset0, subset1), subset2), subset4) %>%
# dplyr::filter(ID %in% GOlist)
#
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) +
# geom_point() + theme_bw() +
# scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
# scale_size_continuous(range = c(0, 3)) +
# labs(x = NULL, y = NULL) +
# theme(axis.text = element_text(size = 6), # Set axis text size
# axis.title = element_text(size = 6), # Set axis title size (if not removed)
# legend.text = element_text(size = 6), # Set legend text size
# legend.title = element_text(size = 6))
#
# fileName <- here(figDir, "..", "GO", "GO_groups_pe")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
# distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
p <- ggplot(temp, aes(x = peOver, y = mean_diff_score)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ",convPvalue( ps14), "\n",
# "ps04: ",convPvalue( ps04), "\n"),
# color = "black", hjust = 0, size = 3)
fileName <- paste0("diffScore_barplot_peGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
# distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)
p <- ggplot(tempDown, aes(x = peOver, y = log2FoldChange)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ", convPvalue(ps14), "\n",
# "ps04: ", convPvalue(ps04), "\n"),
# color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))
fileName <- paste0("log2FC_barplot_peGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
#
p <- ggplot(tempUp, aes(x = peOver, y = log2FoldChange)) + geom_violin(aes(fill = peOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = peOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ", convPvalue(ps14), "\n",
# "ps04: ", convPvalue(ps04), "\n"),
# color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))
fileName <- paste0("log2FC_barplot_peGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# temp2 contains genes from group 1, 2, 5, 8 and loop counts
#calculating diff score and log2fc distribution based on p-n numbers
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_A485_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_A485_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
ppOver = ifelse(gene %in% ppOver4, "ppOver4",
ifelse(gene %in% ppOver3, "ppOver3",
ifelse(gene %in% ppOver2, "ppOver2",
ifelse(gene %in% ppOver1, "ppOver1",
ifelse(gene %in% ppOver0, "ppOver0", NA)))))) %>%
drop_na(ppOver)
# GOfigDir <- here(figDir, "../GO")
# getGO("ppOver4", GOfigDir, psOver4)
# getGO("ppOver3", GOfigDir, psOver3)
# getGO("ppOver2", GOfigDir, psOver2)
# getGO("ppOver1", GOfigDir, psOver1)
# getGO("ppOver0", GOfigDir, psOver0)
#
# #####################
# GO0.df <- as.data.frame(enrichGO(gene = psOver0, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO1.df <- as.data.frame(enrichGO(gene = psOver1, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO2.df <- as.data.frame(enrichGO(gene = psOver2, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO3.df <- as.data.frame(enrichGO(gene = psOver3, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
# GO4.df <- as.data.frame(enrichGO(gene = psOver4, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP"))
#
#
# subset0 <- GO0.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver0") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset1 <- GO1.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver1") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset2 <- GO2.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver2") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset3 <- GO3.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver3") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
# subset4 <- GO4.df %>% dplyr::select(ID, Description, GeneRatio, p.adjust) %>% dplyr::mutate(group = "psOver4") %>%
# dplyr::mutate(
# gr = sapply(GeneRatio, function(x) {
# # Split the string by "/"
# parts <- unlist(strsplit(x, "/"))
# # Convert to numeric and perform the division
# as.numeric(parts[1]) / as.numeric(parts[2])
# })
# ) %>% dplyr::arrange(desc(gr))
#
#
# GOlist <- factor(c("GO:0006397", "GO:0008380", "GO:0022613", "GO:0034470",
# "GO:0016055", "GO:0007389", "GO:0048562", "GO:0045165",
# "GO:0072001", "GO:0007517", "GO:0048705", "GO:0003002", "GO:0009411"))
#
# data <- bind_rows(bind_rows(subset0, subset1), subset3) %>%
# dplyr::filter(ID %in% GOlist)
#
# p <- ggplot(data, aes(x = group, y = Description, color = p.adjust, size = gr)) +
# geom_point() + theme_bw() +
# scale_color_gradient(low = "red", high = "blue", limits = c(0, 0.05)) +
# scale_size_continuous(range = c(0, 3)) +
# labs(x = NULL, y = NULL) +
# theme(axis.text = element_text(size = 6), # Set axis text size
# axis.title = element_text(size = 6), # Set axis title size (if not removed)
# legend.text = element_text(size = 6), # Set legend text size
# legend.title = element_text(size = 6))
#
# fileName <- here(figDir, "..", "GO", "GO_groups_pp")
# height = 2
# width = 3.4
# svglite(paste0(fileName, ".svg"), height = height, width = width)
# print(p)
# dev.off()
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(psOver ==group1) )$mean_diff_score
# distance2 <- (data %>% dplyr::filter(psOver ==group2) )$mean_diff_score
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
# ps01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(temp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(temp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
p <- ggplot(temp, aes(x = ppOver, y = mean_diff_score)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ", convPvalue(ps14), "\n",
# "ps04: ", convPvalue(ps04), "\n"),
# color = "black", hjust = 0, size = 3)
fileName <- paste0("diffScore_barplot_ppGroup_A485_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(psOver ==group1) )$log2FoldChange
# distance2 <- (data %>% dplyr::filter(psOver ==group2) )$log2FoldChange
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
tempDown <- temp %>% dplyr::filter(log2FoldChange < 0)
#
# ps01 <- round(getPvalWilcox(tempDown, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempDown, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempDown, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempDown, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempDown, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempDown, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempDown, "psOver0", "psOver4"), 5)
p <- ggplot(tempDown, aes(x = ppOver, y = log2FoldChange)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = -0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ", convPvalue(ps14), "\n",
# "ps04: ", convPvalue(ps04), "\n"),
# color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(-1, 0))
fileName <- paste0("log2FC_barplot_ppGroup_A485_vs_DMSO_down")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
tempUp <- temp %>% dplyr::filter(log2FoldChange > 0)
#
# ps01 <- round(getPvalWilcox(tempUp, "psOver0", "psOver1"), 5)
# ps12 <- round(getPvalWilcox(tempUp, "psOver1", "psOver2"), 5)
# ps23 <- round(getPvalWilcox(tempUp, "psOver2", "psOver3"), 5)
# ps34 <- round(getPvalWilcox(tempUp, "psOver3", "psOver4"), 5)
# ps24 <- round(getPvalWilcox(tempUp, "psOver2", "psOver4"), 5)
# ps14 <- round(getPvalWilcox(tempUp, "psOver1", "psOver4"), 5)
# ps04 <- round(getPvalWilcox(tempUp, "psOver0", "psOver4"), 5)
#
#
p <- ggplot(tempUp, aes(x = ppOver, y = log2FoldChange)) + geom_violin(aes(fill = ppOver), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) + theme_classic() +
stat_summary(aes(group = ppOver), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
geom_hline(yintercept = 0)
# annotate("text", x = 1, y = 0.5, label = paste0("ps01: ", convPvalue(ps01), "\n",
# "ps12: ", convPvalue(ps12), "\n",
# "ps23: ", convPvalue(ps23), "\n",
# "ps34: ", convPvalue(ps34), "\n",
# "ps24: ", convPvalue(ps24), "\n",
# "ps14: ", convPvalue(ps14), "\n",
# "ps04: ", convPvalue(ps04), "\n"),
# color = "black", hjust = 0, size = 3) + coord_cartesian(ylim = c(0, 1))
fileName <- paste0("log2FC_barplot_ppGroup_A485_vs_DMSO_up")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# IMPORTING GENE ANNO DATA FOR P-N LOOPS
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X")) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27, 28))
data <- fread(here(consensusDir, paste0(name, ".tsv"))) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27)) %>%
dplyr::mutate(binSize = end1 - start1,
adj_start1 = ifelse(binSize == 5000, start1,
ifelse(binSize == 10000, start1,
start1 + 10000)),
adj_end1 = adj_start1 + 5000,
adj_start2 = ifelse(binSize == 5000, start2,
ifelse(binSize == 10000, start2,
start2 + 10000)),
adj_end2 = adj_start2 + 5000,
anchor1 = paste(chrom1, adj_start1, adj_end1, sep = "_"),
anchor2 = paste(chrom2, adj_start2, adj_end2, sep = "_"))
# data <- fread(here(consensusDir, paste0(name, ".tsv"))) %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 13, 22, 23, 24, 25, 27)) %>%
# dplyr::mutate(binSize = end1 - start1,
# adj_start1 = ifelse(binSize == 5000, start1,
# ifelse(binSize == 10000, start1+5000,
# start1 + 10000)),
# adj_end1 = adj_start1 + 5000,
# adj_start2 = ifelse(binSize == 5000, start2,
# ifelse(binSize == 10000, start2 + 5000,
# start2 + 10000)),
# adj_end2 = adj_start2 + 5000,
# anchor1 = paste(chrom1, adj_start1, adj_end1, sep = "_"),
# anchor2 = paste(chrom2, adj_start2, adj_end2, sep = "_"))
data.reg <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "P-S", "P-X", "E-E", "E-S", "E-X"))
## Graph
#install.packages("igraph")
library(igraph)
# To be able to construct a graph, each anchor need to have same resolution.
# Shink the size to 10 kb
# If the anchor is 25kb, choose the middle 5kb bin
# If the anchor is 10kb, choose the left 5kb bin
graph <- graph_from_data_frame(data.reg %>% dplyr::select(anchor1, anchor2))
components <- components(graph)
data.reg$hub <- components$membership[data.reg$anchor1]
temp.bedpe <- data.reg %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj.bedpe")),
col.names = FALSE,row.names = FALSE, sep = "\t")
hubNum <- data.reg %>% group_by(hub) %>% summarise(count = n())
# Hub size distribution
p <- ggplot(hubNum, aes(x = count)) + geom_histogram(binwidth = 1) + theme_classic() +
scale_y_log10()
svglite(here(figDir, "hub_size_distribution.svg"), height = 2, width = 2)
print(p)
dev.off()
data.reg <- data.reg %>% dplyr::left_join(hubNum, by = "hub")
data.reg <- data.reg %>% dplyr::mutate(peakID = paste(chrom1, start1, start2, sep = "_"))
# Exporting regulatory loops with specific hub size
temp.bedpe <- data.reg %>% dplyr::filter(count >= 3) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub3.bedpe")),
col.names = FALSE,row.names = FALSE, sep = "\t")
temp.bedpe <- data.reg %>% dplyr::filter(count >= 5) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub5.bedpe")),
col.names = FALSE,row.names = FALSE, sep = "\t")
temp.bedpe <- data.reg %>% dplyr::filter(count >= 10) %>% dplyr::select(chrom1, adj_start1, adj_end1, chrom2, adj_start2, adj_end2)
fwrite(temp.bedpe, here(consensusDir, paste0(name, "_regulatory_5kbAdj_hub10.bedpe")),
col.names = FALSE,row.names = FALSE, sep = "\t")
# Adding gene annotation to hubs
genePeakPair <- geneAnnoData %>% dplyr::mutate(peakID = paste(chrom1, start1, start2, sep = "_")) %>% dplyr::select(peakID, gene)
data.reg <- data.reg %>% dplyr::left_join(genePeakPair, by = "peakID")
# During unnesting, E-N loops without gene annotated are removed
data.reg <- data.reg %>% unnest(gene) %>% dplyr::mutate(
group = ifelse(gene %in% gene.group1, "group1",
ifelse(gene %in% gene.group2, "group2",
ifelse(gene %in% gene.group3, "group3",
ifelse(gene %in% gene.group4, "group4",
ifelse(gene %in% gene.group5, "group5",
ifelse(gene %in% gene.group6, "group6",
ifelse(gene %in% gene.group7, "group7",
ifelse(gene %in% gene.group8, "group8",
ifelse(gene %in% gene.group9, "group9", NA)))))))))
)
temp <- data.reg %>% dplyr::select(gene, hub, count, group) %>%
group_by(gene) %>% slice_max(count, with_ties = FALSE) %>%
dplyr::filter(group %in% c("group1", "group2", "group5", "group8"))
# TEMP START
# TEMP START
geneList.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))
geneList.group1.temp <- geneList.group1 %>% dplyr::left_join(temp, by = c("gene"))
# TEMP END
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$count
distance2 <- (data %>% dplyr::filter(group ==group2) )$count
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv12 <- round(getPvalWilcox(tempSum,"group1", "group2"), 5)
pv15 <- round(getPvalWilcox(tempSum,"group1", "group5"), 5)
pv18 <- round(getPvalWilcox(tempSum,"group1", "group8"), 5)
pv25 <- round(getPvalWilcox(tempSum,"group2", "group5"), 5)
pv28 <- round(getPvalWilcox(tempSum,"group2", "group8"), 5)
pv58 <- round(getPvalWilcox(tempSum,"group5", "group8"), 5)
p <- ggplot(temp, aes(x = group, y = count, fill = group)) +
geom_boxplot(width = 0.5, outlier.shape = NA) + theme_classic() +
scale_y_continuous(breaks = seq(0, 30, by = 5), limits = c(0, 30)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 3, label = paste0("pv12: ", pv12, "\n",
"pv15: ", pv15, "\n",
"pv18: ", pv18, "\n",
"pv25: ", pv25, "\n",
"pv28: ", pv28, "\n",
"pv58: ", pv58, "\n"),
color = "black", hjust = 0, size = 1) + theme(legend.position = "none")
fileName <- paste0("countHub_barplot_diffGroup_dTAG_vs_DMSO")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# Import insulation score calculated with python
resultDir <- here("../../result")
insScore.DMSO <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1DMSO.tsv")) %>%
dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.DMSO) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.dTAG <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1dTAG.tsv")) %>%
dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.dTAG) <- c("chrom", "start", "end", "insulationScore", "binID")
#insScore <- full_join(insScore.DMSO, insScore.dTAG, by = c("binID")) %>%
# dplyr::select(binID, chrom.x, start.x, end.x, insulationScore.x, insulationScore.y)
#colnames(insScore) <- c("binID", "chr", "start", "end", "insulation_score_DMSO", "insulation_scoare_dTAG")
#View(insScore)
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")
getInsulationScore <- function(chr, TSS, insScore.tb){
temp.tb <- insScore.tb %>% dplyr::filter(chrom == chr, start < TSS, end > TSS)
out <- temp.tb$insulationScore
if(length(out) < 1){
return(NA)
}else{
return(temp.tb$insulationScore)
}
}
gene.insScore.all <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
log2_insScore_DMSO = getInsulationScore(chr, TSS, insScore.DMSO),
insScore_DMSO = 2^log2_insScore_DMSO,
log2_insScore_dTAG = getInsulationScore(chr, TSS, insScore.dTAG),
insScore_dTAG = 2^log2_insScore_dTAG,
diff_insScore = insScore_dTAG - insScore_DMSO)
# Importing TAD boundaries
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
center = (start + end)/2)
findClosestTADBoundary <- function(chrom, TSS, tad_boundary){
temp <- tad_boundary %>% dplyr::filter(chr == chrom) %>%
dplyr::mutate(distance = abs(center - TSS)) %>%
slice_min(distance)
if(nrow(temp) == 1){
return(temp$center)
}else{
return(NA)
}
}
gene.insScore.all <- gene.insScore.all %>% rowwise() %>%
dplyr::mutate(closestBoundary = findClosestTADBoundary(chr, TSS, tad_boundary),
log2_boundaryInsScore_DMSO = getInsulationScore(chr, closestBoundary, insScore.DMSO),
boundaryInsScore_DMSO = 2^log2_boundaryInsScore_DMSO,
log2_boundaryInsScore_dTAG = getInsulationScore(chr, closestBoundary, insScore.dTAG),
boundaryInsScore_dTAG = 2^log2_boundaryInsScore_dTAG,
diff_boundaryInsScore = boundaryInsScore_dTAG - boundaryInsScore_DMSO)
saveRDS(gene.insScore.all, here(resultDir, "gene.insScore.all.rds"))
gene.insScore.all <- readRDS(here(resultDir, "gene.insScore.all.rds"))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% group1, "group1",
ifelse(ensembl %in% group2, "group2", NA))) %>%
dplyr::filter(!is.na(group))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$score
distance2 <- (data %>% dplyr::filter(group ==group2) )$score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_insScore <- function(temp.tb, note, ymin = -1.5, ymax = 0){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p <- ggplot(temp.tb, aes(x = ".", fill = group, y = -score)) +
labs(x = NULL, y = "- Insulation score") + # Remove x-axis title
scale_fill_manual(values = rev((c("#777777", "#F28E2C")))) +
introdataviz::geom_split_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square",
alpha = .4) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) + theme_classic() +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
#
# geom_violin(aes(fill = group), color = "black", linewidth = lineThick*mmToLineUnit, lineend = "square", show.legend = FALSE) +
# geom_boxplot(width = 0.1, color = "black", linewidth = lineThick*mmToLineUnit, lineend = "square",
# outlier.size = 1, outlier.stroke = NA) +
# stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 1, fill = "red", color = "black") +
geom_hline(yintercept = 0, linewidth = lineThick*mmToLineUnit) +
theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+ guides(
fill = guide_legend(
keywidth = 0.2, # Adjust the width of the legend keys
keyheight = 0.2 # Adjust the height of the legend keys
))
fileName <- paste0("insulation_score_binaryGroup_", note)
width <- panelSize(1.55)*mmToInch
height <- panelSize(1.2)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_binaryGroup", ymin = -1.1, ymax = -0.5)
# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_binaryGroup", ymin = -1.1, ymax = -0.5)
# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_binaryGroup", ymin = -0.3, ymax = 0.1)
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene
gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% group1, "group1",
ifelse(ensembl %in% group2, "group2",
ifelse(ensembl %in% group5, "group5",
ifelse(ensembl %in% group8, "group8", NA))))) %>%
dplyr::filter(!is.na(group))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$score
distance2 <- (data %>% dplyr::filter(group ==group2) )$score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("insulation_score_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO")
# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG")
# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff", ymin = -0.5, ymax = 1)
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene
gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% pnOver8, "p8",
ifelse(ensembl %in% pnOver6, "p6",
ifelse(ensembl %in% pnOver4, "p4",
ifelse(ensembl %in% pnOver2, "p2",
ifelse(ensembl %in% pnOver0, "p0", NA)))))) %>%
dplyr::filter(!is.na(group))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$score
distance2 <- (data %>% dplyr::filter(group ==group2) )$score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
p02 <- round(getPvalWilcox(temp.tb, "p0", "p2"), 5)
p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
p46 <- round(getPvalWilcox(temp.tb, "p4", "p6"), 5)
p68 <- round(getPvalWilcox(temp.tb, "p6", "p8"), 5)
p48 <- round(getPvalWilcox(temp.tb, "p4", "p8"), 5)
p28 <- round(getPvalWilcox(temp.tb, "p2", "p8"), 5)
p08 <- round(getPvalWilcox(temp.tb, "p0", "p8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
annotate("text", x = 1, y = ymin + 1, label = paste0("p02: ", convPvalue(p02), "\n",
"p24: ", convPvalue(p24), "\n",
"p46: ", convPvalue(p46), "\n",
"p68: ", convPvalue(p68), "\n",
"p48: ", convPvalue(p48), "\n",
"p28: ",convPvalue( p28), "\n",
"p08: ",convPvalue( p08), "\n"),
color = "black", hjust = 0, size = 3) +
scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
fileName <- paste0("insulation_score_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-N")
# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-N")
# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-N", ymin = -0.5, ymax = 1)
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene
gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% psOver4, "p4",
ifelse(ensembl %in% psOver3, "p3",
ifelse(ensembl %in% psOver2, "p2",
ifelse(ensembl %in% psOver1, "p1",
ifelse(ensembl %in% psOver0, "p0", NA)))))) %>%
dplyr::filter(!is.na(group))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$score
distance2 <- (data %>% dplyr::filter(group ==group2) )$score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
"p12: ", convPvalue(p12), "\n",
"p23: ", convPvalue(p23), "\n",
"p34: ", convPvalue(p34), "\n",
"p24: ", convPvalue(p24), "\n",
"p14: ",convPvalue( p14), "\n",
"p04: ",convPvalue( p04), "\n"),
color = "black", hjust = 0, size = 3) +
scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
fileName <- paste0("insulation_score_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-S")
# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-S")
# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-S", ymin = -0.5, ymax = 1)
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene
gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% peOver4, "p4",
ifelse(ensembl %in% peOver3, "p3",
ifelse(ensembl %in% peOver2, "p2",
ifelse(ensembl %in% peOver1, "p1",
ifelse(ensembl %in% peOver0, "p0", NA)))))) %>%
dplyr::filter(!is.na(group))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$score
distance2 <- (data %>% dplyr::filter(group ==group2) )$score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
"p12: ", convPvalue(p12), "\n",
"p23: ", convPvalue(p23), "\n",
"p34: ", convPvalue(p34), "\n",
"p24: ", convPvalue(p24), "\n",
"p14: ",convPvalue( p14), "\n",
"p04: ",convPvalue( p04), "\n"),
color = "black", hjust = 0, size = 3) +
scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
fileName <- paste0("insulation_score_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-E")
# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-E")
# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-E", ymin = -0.5, ymax = 1)
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene
gene.insScore <- gene.insScore.all %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% ppOver4, "p4",
ifelse(ensembl %in% ppOver3, "p3",
ifelse(ensembl %in% ppOver2, "p2",
ifelse(ensembl %in% ppOver1, "p1",
ifelse(ensembl %in% ppOver0, "p0", NA)))))) %>%
dplyr::filter(!is.na(group))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$score
distance2 <- (data %>% dplyr::filter(group ==group2) )$score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_insScore <- function(temp.tb, note, ymin = 0, ymax = 1.5){
p01 <- round(getPvalWilcox(temp.tb, "p0", "p1"), 5)
p12 <- round(getPvalWilcox(temp.tb, "p1", "p2"), 5)
p23 <- round(getPvalWilcox(temp.tb, "p2", "p3"), 5)
p34 <- round(getPvalWilcox(temp.tb, "p3", "p4"), 5)
p24 <- round(getPvalWilcox(temp.tb, "p2", "p4"), 5)
p14 <- round(getPvalWilcox(temp.tb, "p1", "p4"), 5)
p04 <- round(getPvalWilcox(temp.tb, "p0", "p4"), 5)
p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
annotate("text", x = 1, y = ymin + 1, label = paste0("p01: ", convPvalue(p01), "\n",
"p12: ", convPvalue(p12), "\n",
"p23: ", convPvalue(p23), "\n",
"p34: ", convPvalue(p34), "\n",
"p24: ", convPvalue(p24), "\n",
"p14: ",convPvalue( p14), "\n",
"p04: ",convPvalue( p04), "\n"),
color = "black", hjust = 0, size = 3) +
scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
fileName <- paste0("insulation_score_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# DMSO
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_DMSO)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_DMSO_P-P")
# dTAG
temp.tb <- gene.insScore %>% dplyr::select(group, boundaryInsScore_dTAG)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_dTAG_P-P")
# diff
temp.tb <- gene.insScore %>% dplyr::select(group, diff_boundaryInsScore)
colnames(temp.tb) <- c("group", "score")
plot_insScore(temp.tb, "nearestBoundary_diff_P-P", ymin = -0.5, ymax = 1)
refDir <- here("../..", "reference")
# Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
center = (start + end)/2)
# Importing TAD
tad <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_G1DMSO.bedpe"))
colnames(tad) <- c("chr1", "start1", "end1", "chr2", "start2", "end2")
tad <- tad %>% dplyr::mutate(tadID = paste(chr1, start1, end1, sep = "_"),
tadSize = end1 - start1)
# Importing gene
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")
# Functions
findDistanceToTAD <- function(chrom, TSS, tad_boundary){
temp <- tad_boundary %>% dplyr::filter(chr == chrom) %>%
dplyr::mutate(distance = abs(center - TSS)) %>% slice_min(distance)
if(nrow(temp) == 1){
return(temp$distance)
}else{
return(NA)
}
}
findItsTAD <- function(chrom, TSS, tad){
temp <- tad %>% dplyr::filter(chr1 == chrom,
start1 < TSS,
end1 > TSS)
if(nrow(temp) == 1){
return(temp$tadID)
}else{
return(NA)
}
}
findTADSize <-function(chrom, TSS, tad){
temp <- tad %>% dplyr::filter(chr1 == chrom,
start1 < TSS,
end1 > TSS)
if(nrow(temp) == 1){
return(temp$tadSize)
}else{
return(NA)
}
}
gene.TSS.tb <- gene.TSS.tb %>% rowwise() %>%
dplyr::mutate(distance = findDistanceToTAD(chr, TSS, tad_boundary),
TAD = findItsTAD(chr, TSS, tad),
TADsize = findTADSize(chr, TSS, tad))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% group1, "group1",
ifelse(ensembl %in% group2, "group2", NA))) %>%
dplyr::filter(!is.na(group))
## Plot distance
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$distance
distance2 <- (data %>% dplyr::filter(group == group2) )$distance
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_TADdistance <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p <- ggplot(temp.tb, aes(x = ".", fill = group, y = distance)) +
# Set axis labels (no x-axis title)
scale_fill_manual(values = ((c("#777777", "#F28E2C")))) +
labs(x = NULL, y = "Distance from TAD boundary") +
# Violin plot with black outline, customized line width and end
introdataviz::geom_split_violin(
color = "black", alpha = 0.4,
linewidth = lineMedium * mmToLineUnit, lineend = "square"
) +
# Box plot with customized line width, square end, and outlier size
geom_boxplot(
width = 0.3, color = "black", alpha = 0.6,
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE
) +
# Mean point in each group
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
# Horizontal line at y = 0
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
# Coordinate limits and custom y-axis labels
coord_cartesian(ylim = c(ymin, ymax)) +
scale_y_continuous(labels = label_kb_mb) +
# Annotate p-value text
annotate(
"text", x = 1, y = ymin + 1,
label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3
) +
# Theme customization
theme_classic() +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)) +
guides(
fill = guide_legend(
keywidth = 0.2, # Adjust the width of the legend keys
keyheight = 0.2 # Adjust the height of the legend keys
)
)
fileName <- paste0("distance_to_boundary_", note)
width <- panelSize(1.55)*mmToInch
height <- panelSize(1.2)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_TADdistance(gene.TSS.tb.plot, "group_binayGroup", ymax = 1000000)
## Plot TAD size
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$TADsize
distance2 <- (data %>% dplyr::filter(group == group2) )$TADsize
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_TADsize <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p <- ggplot(temp.tb, aes(x = ".", fill = group, y = TADsize)) +
# Set axis labels (no x-axis title)
labs(x = NULL, y = "Size of TAD") +
scale_fill_manual(values = ((c("#777777", "#F28E2C")))) +
# Violin plot with black outline, customized line width and end
introdataviz::geom_split_violin(
color = "black",
linewidth = lineMedium* mmToLineUnit, lineend = "square", alpha = .4
) +
# Box plot with customized line width, square end, and outlier size
geom_boxplot(
width = 0.3, color = "black",
linewidth = lineMedium* mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE
) +
# Mean point in each group
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
# Horizontal line at y = 0
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
# Coordinate limits and custom y-axis labels
coord_cartesian(ylim = c(ymin, ymax)) +
scale_y_continuous(labels = label_kb_mb) +
# Annotate p-value text
annotate(
"text", x = 1, y = ymin + 1,
label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3
) +
# Theme customization
theme_classic() +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) + guides(
fill = guide_legend(
keywidth = 0.2, # Adjust the width of the legend keys
keyheight = 0.2 # Adjust the height of the legend keys
))
fileName <- paste0("size_of_boundary_", note)
width <- panelSize(1.55)*mmToInch
height <- panelSize(1.2)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_TADsize(gene.TSS.tb.plot, "group_binaryGroup", ymax = 3e6)
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene
gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% group1, "group1",
ifelse(ensembl %in% group2, "group2",
ifelse(ensembl %in% group5, "group5",
ifelse(ensembl %in% group8, "group8", NA))))) %>%
dplyr::filter(!is.na(group))
## Plot distance
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$distance
distance2 <- (data %>% dplyr::filter(group == group2) )$distance
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_TADdistance <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + scale_y_continuous(labels = label_kb_mb) +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("distance_to_boundary_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_TADdistance(gene.TSS.tb.plot, "group")
## Plot TAD size
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$TADsize
distance2 <- (data %>% dplyr::filter(group == group2) )$TADsize
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_TADsize <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = TADsize)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + scale_y_continuous(labels = label_kb_mb) +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("size_of_boundary_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_TADsize(gene.TSS.tb.plot, "group", ymax = 6e6)
group1 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group2.tsv"))$gene
group3 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group3.tsv"))$gene
group4 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group4.tsv"))$gene
group5 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group5.tsv"))$gene
group6 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group6.tsv"))$gene
group7 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group7.tsv"))$gene
group8 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group8.tsv"))$gene
group9 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group9.tsv"))$gene
gene.TSS.tb.plot <- gene.TSS.tb %>%
rowwise() %>%
dplyr::mutate(
group = dplyr::case_when(
ensembl %in% group1 ~ "group1",
ensembl %in% group2 ~ "group2",
ensembl %in% group3 ~ "group3",
ensembl %in% group4 ~ "group4",
ensembl %in% group5 ~ "group5",
ensembl %in% group6 ~ "group6",
ensembl %in% group7 ~ "group7",
ensembl %in% group8 ~ "group8",
ensembl %in% group9 ~ "group9",
TRUE ~ NA_character_
)
) %>%
dplyr::filter(!is.na(group))
## Plot distance
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(group == group1) )$distance
# distance2 <- (data %>% dplyr::filter(group == group2) )$distance
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
plot_TADdistance <- function(temp.tb, note, ymin = 0, ymax = 2000000){
# p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
# p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
# p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
# p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
# p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
# p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
#
p <- ggplot(temp.tb, aes(x = group, y = distance)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + scale_y_continuous(labels = label_kb_mb)
# annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
# "p15: ", convPvalue(p15), "\n",
# "p18: ", convPvalue(p18), "\n",
# "p25: ", convPvalue(p25), "\n",
# "p28: ", convPvalue(p28), "\n",
# "p58: ",convPvalue( p58), "\n"),
# color = "black", hjust = 0, size = 3)
fileName <- paste0("distance_to_boundary_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_TADdistance(gene.TSS.tb.plot, "group_A485")
## Plot TAD size
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(group == group1) )$TADsize
# distance2 <- (data %>% dplyr::filter(group == group2) )$TADsize
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
#
plot_TADsize <- function(temp.tb, note, ymin = 0, ymax = 2000000){
# p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
# p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
# p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
# p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
# p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
# p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = TADsize)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + scale_y_continuous(labels = label_kb_mb)
# annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
# "p15: ", convPvalue(p15), "\n",
# "p18: ", convPvalue(p18), "\n",
# "p25: ", convPvalue(p25), "\n",
# "p28: ", convPvalue(p28), "\n",
# "p58: ",convPvalue( p58), "\n"),
# color = "black", hjust = 0, size = 3)
fileName <- paste0("size_of_boundary_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_TADsize(gene.TSS.tb.plot, "group_A485", ymax = 6e6)
refDir <- here("../..", "reference")
# Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
start = start -175000/2,
end = end + 175000/2,
center = (start + end)/2)
# Importing RAD21 bigwig track
bw.RAD21 <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm_bin50bp.bw"))
# Create GRanges object for TAD boundaries
tad_ranges <- makeGRangesFromDataFrame(tad_boundary, keep.extra.columns = TRUE)
# Find overlaps between all TAD boundaries and RAD21 bigwig data
overlaps <- findOverlaps(tad_ranges, bw.RAD21)
# Extract overlapping regions and scores from the BigWig
overlapping_bw <- bw.RAD21[subjectHits(overlaps)]
overlapping_tads <- tad_ranges[queryHits(overlaps)]
# Aggregate scores by TAD boundary regions
scores <- as_tibble(overlapping_bw) %>%
dplyr::mutate(tad_id = tad_ranges[queryHits(overlaps)]$tad_id) %>%
group_by(tad_id) %>%
summarise(rad21Score = sum(score))
tad_boundary <- tad_boundary %>%
left_join(scores, by = "tad_id")
# Importing gene
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")
# Find nearest TAD boundary
findClosestTADBoundaryID <- function(chrom, TSS, tad_boundary){
temp <- tad_boundary %>% dplyr::filter(chr == chrom) %>%
dplyr::mutate(distance = abs(center - TSS)) %>%
slice_min(distance)
if(nrow(temp) == 1){
return(temp$tad_id)
}else{
return(NA)
}
}
gene.TSS.tb <- gene.TSS.tb %>% rowwise() %>%
dplyr::mutate(closestBoundary = findClosestTADBoundaryID(chr, TSS, tad_boundary))
temp <- tad_boundary %>% dplyr::select(tad_id, rad21Score)
gene.TSS.tb <- gene.TSS.tb %>% left_join(temp, by = c("closestBoundary" = "tad_id") )
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene
gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% group1, "group1",
ifelse(ensembl %in% group2, "group2",
ifelse(ensembl %in% group5, "group5",
ifelse(ensembl %in% group8, "group8", NA))))) %>%
dplyr::filter(!is.na(group))
## Plot distance
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$rad21Score
distance2 <- (data %>% dplyr::filter(group == group2) )$rad21Score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_rad21ScoreAtBoundary <- function(temp.tb, note){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = rad21Score)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = 20 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("rad21Score_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_rad21ScoreAtBoundary(gene.TSS.tb.plot, "group")
### IMPORTING REQUIRED DATA
# Importing TAD
tad <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_G1DMSO.bedpe"))
colnames(tad) <- c("chr1", "start1", "end1", "chr2", "start2", "end2")
tad <- tad %>% dplyr::mutate(tadID = paste(chr1, start1, end1, sep = "_"))
# Importing gene
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V6, V5, V1, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "gene", "chr", "TSS")
# Importing enhancer
peak.H3K27ac <- as_tibble(importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))) %>%
dplyr::mutate(center = (start + end)/2)
### FUNCTION FOR COUNTING
findItsTAD <- function(chrom, coordinate, tad){
temp <- tad %>% dplyr::filter(chr1 == chrom,
start1 < coordinate,
end1 > coordinate)
if(nrow(temp) == 1){
return(temp$tadID)
}else{
return(NA)
}
}
### Counting each features for TAD
### Counting gene
geneCountPerTAD <- gene.TSS.tb %>% rowwise() %>%
dplyr::mutate(TAD = findItsTAD(chr, TSS, tad)) %>%
drop_na() %>%
group_by(TAD) %>%
summarize(count = n())
### Counting enhancer
enhCountPerTAD <- peak.H3K27ac %>% rowwise() %>%
dplyr::mutate(TAD = findItsTAD(seqnames, center, tad)) %>%
drop_na() %>%
group_by(TAD) %>%
summarize(count = n())
### Get TAD sizez
tad.db <- tad %>% left_join(geneCountPerTAD, by = c("tadID" = "TAD"), ) %>%
left_join(enhCountPerTAD, by = c("tadID" = "TAD")) %>%
dplyr::select(c(1, 2, 3, 7, 8, 9)) %>%
mutate_all(~replace(., is.na(.), 0))
colnames(tad.db) <- c("chr", "start", "end", "tadID", "geneCount", "enhCount")
tad.db <- tad.db %>% dplyr::mutate(
tadSize = end - start,
geneDensity = geneCount/tadSize * 100e3,
enhDensity = enhCount/tadSize* 100e3,
regDensity = (geneCount + enhCount)/tadSize* 100e3
)
### Assign TAD and information to gene
gene.TSS.tb <- gene.TSS.tb %>% rowwise() %>%
dplyr::mutate(TAD = findItsTAD(chr, TSS, tad))
#gene.TSS.tb <- gene.TSS.tb %>% dplyr::filter(!is.na(TAD))
temp <- tad.db %>% dplyr::select(-c(1, 2, 3))
gene.TSS.tb <- gene.TSS.tb %>% dplyr::left_join(temp, by = c("TAD" = "tadID"))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% group1, "group1",
ifelse(ensembl %in% group2, "group2", NA))) %>%
dplyr::filter(!is.na(group))
## geneCount
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$geneCount
distance2 <- (data %>% dplyr::filter(group == group2) )$geneCount
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_geneCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p <- ggplot(temp.tb, aes(x = group, y = geneCount)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3)
fileName <- paste0("geneCount_", note)
height <- 3
width <- 1.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_geneCount(gene.TSS.tb.plot, "group_binaryGroup", ymax = 200)
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene
gene.TSS.tb.plot <- gene.TSS.tb %>% rowwise() %>% dplyr::mutate(
group = ifelse(ensembl %in% group1, "group1",
ifelse(ensembl %in% group2, "group2",
ifelse(ensembl %in% group5, "group5",
ifelse(ensembl %in% group8, "group8", NA))))) %>%
dplyr::filter(!is.na(group))
## geneCount
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$geneCount
distance2 <- (data %>% dplyr::filter(group == group2) )$geneCount
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_geneCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = geneCount)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("geneCount_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_geneCount(gene.TSS.tb.plot, "group", ymax = 200)
## enhCount
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$enhCount
distance2 <- (data %>% dplyr::filter(group == group2) )$enhCount
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_enhCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p <- ggplot(temp.tb, aes(x = group, y = enhCount)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3)
fileName <- paste0("enhCount_", note)
height <- 3
width <- 1.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_enhCount(gene.TSS.tb.plot, "group_binaryGroup", ymax = 200)
## enhCount
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$enhCount
distance2 <- (data %>% dplyr::filter(group == group2) )$enhCount
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_enhCount <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = enhCount)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("enhCount_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_enhCount(gene.TSS.tb.plot, "group", ymax = 200)
## geneDensity
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$geneDensity
distance2 <- (data %>% dplyr::filter(group == group2) )$geneDensity
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_geneDensity <- function(temp.tb, note, ymin = 0, ymax = 10){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p <- ggplot(temp.tb, aes(x = ".", fill = group, y = geneDensity)) +
# Set axis labels (no x-axis title)
labs(x = NULL, y = "Gene density within TAD") +
scale_fill_manual(values = (rev(c("#777777","#F28E2C")))) +
# Violin plot with black outline, customized line width and end
introdataviz::geom_split_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square",
alpha = .4
) +
# Box plot with customized line width, square end, and outlier size
geom_boxplot(
width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE
) +
# Mean point in each group
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
# Horizontal line at y = 0
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
# Coordinate limits and custom y-axis labels
coord_cartesian(ylim = c(ymin, ymax)) +
# Annotate p-value text
annotate(
"text", x = 1, y = ymin + 1,
label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3
) +
# Theme customization
theme_classic() +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
guides(
fill = guide_legend(
keywidth = 0.2, # Adjust the width of the legend keys
keyheight = 0.2 # Adjust the height of the legend keys
))
fileName <- paste0("geneDensity_", note)
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.2)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_geneDensity(gene.TSS.tb.plot, "group_binaryGroup", ymax = 8)
## geneDensity
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$geneDensity
distance2 <- (data %>% dplyr::filter(group == group2) )$geneDensity
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_geneDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = geneDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("geneDensity_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_geneDensity(gene.TSS.tb.plot, "group", ymax = 10)
## enhDensity
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$enhDensity
distance2 <- (data %>% dplyr::filter(group == group2) )$enhDensity
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_enhDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p <- ggplot(temp.tb, aes(x = group, y = enhDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3)
fileName <- paste0("enhDensity_", note)
height <- 3
width <- 1.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_enhDensity(gene.TSS.tb.plot, "group_binaryGroup", ymax = 10)
## enhDensity
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$enhDensity
distance2 <- (data %>% dplyr::filter(group == group2) )$enhDensity
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_enhDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = enhDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("enhDensity_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_enhDensity(gene.TSS.tb.plot, "group", ymax = 10)
## regDensity
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$regDensity
distance2 <- (data %>% dplyr::filter(group == group2) )$regDensity
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
plot_regDensity <- function(temp.tb, note, ymin = 0, ymax = 2000000){
p12 <- round(getPvalWilcox(temp.tb, "group1", "group2"), 5)
p15 <- round(getPvalWilcox(temp.tb, "group1", "group5"), 5)
p18 <- round(getPvalWilcox(temp.tb, "group1", "group8"), 5)
p25 <- round(getPvalWilcox(temp.tb, "group2", "group5"), 5)
p28 <- round(getPvalWilcox(temp.tb, "group2", "group8"), 5)
p58 <- round(getPvalWilcox(temp.tb, "group5", "group8"), 5)
p <- ggplot(temp.tb, aes(x = group, y = regDensity)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = (ymin + ymax)/2 + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("regDensity_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
plot_regDensity(gene.TSS.tb.plot, "group", ymax = 10)
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj)
temp <- left_join(gene.TSS.tb, diff.RNA, by = c("ensembl" = "ensembl_gene_id")) %>% dplyr::filter(!is.na(log2FoldChange),
!is.na(TAD))
data <- temp %>% group_by(TAD) %>%
dplyr::summarise(absAvgLog2FC = abs(mean(log2FoldChange)),
avgLog2FC = mean(log2FoldChange),
geneDensity = mean(geneDensity),
enhDensity = mean(enhDensity),
regDensity = mean(regDensity))
data$geneDensityGroup <- cut(
data$geneDensity,
breaks = quantile(data$geneDensity, probs = seq(0, 1, 0.2), na.rm = TRUE),
include.lowest = TRUE,
labels = paste0(seq(0, 80, 20), "-", seq(20, 100, 20), "%")
)
data$enhDensityGroup <- cut(
data$enhDensity,
breaks = quantile(data$enhDensity, probs = seq(0, 1, 0.2), na.rm = TRUE),
include.lowest = TRUE,
labels = paste0(seq(0, 80, 20), "-", seq(20, 100, 20), "%")
)
### Ploting grouping
p <- ggplot(data, aes(x = enhDensityGroup, y = enhDensity, fill = enhDensityGroup)) +
geom_boxplot(color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA) + theme_classic() +
coord_cartesian(ylim = c(0, quantile(data$enhDensity, 0.99))) +
labs(x = "Enhancer Density Group" , y = "TAD enhancer density") +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
theme(
legend.position = "none",
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("tadGroup_enhDensity")
width <- panelSize(1.2)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
p <- ggplot(data, aes(x = geneDensityGroup, y = geneDensity, fill = geneDensityGroup)) +
geom_boxplot(color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA) + theme_classic() +
coord_cartesian(ylim = c(0, quantile(data$geneDensity, 0.99))) +
labs(x = "Gene Density Group" , y = "Gene density") +
scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
theme(
legend.position = "none",
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("tadGroup_geneDensity")
width <- 31*mmToInch
height <- 38*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### Plotting distribution - enh
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(enhDensityGroup ==group1) )$avgLog2FC
distance2 <- (data %>% dplyr::filter(enhDensityGroup ==group2) )$avgLog2FC
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
#### ALL
# ylim <- max(abs(quantile(data$avgLog2FC, 0.01)), abs(quantile(data$avgLog2FC, 0.99)))
# p <- ggplot(data, aes(x = enhDensityGroup, y = avgLog2FC)) + geom_violin(aes(fill = enhDensityGroup))+
# geom_boxplot(outlier.shape = NA, width = 0.1) + theme_classic() +
# coord_cartesian(ylim = c(-ylim, ylim)) +
# geom_hline(yintercept = 0) + theme(legend.position = "none") +
# scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
#
# fileName <- paste0("tadGroup_enhDensity_avgLog2FC")
# height <- 3
# width <- 3
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
# print(p)
# dev.off()
#### UP
data.plus <- data %>% dplyr::filter(avgLog2FC > 0)
ps01 <- round(getPvalWilcox(data.plus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.plus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.plus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.plus, "60-80%", "80-100%"), 5)
p <- ggplot(data.plus, aes(x = enhDensityGroup, y = avgLog2FC)) +
geom_violin(aes(fill = enhDensityGroup),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE)+
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
labs(x = "Enhancer density group" , y = "avg log2(fold change)") +
stat_summary(
aes(group = enhDensityGroup), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+ coord_cartesian(ylim = c(0, 1.5)) +
annotate("text", x = 1, y = 1, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n"),
color = "black", hjust = 0, size = 2)
fileName <- paste0("tadGroup_enhDensity_avgLog2FC_up")
width <- panelSize(1.15)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#### DOWN
data.minus <- data %>% dplyr::filter(avgLog2FC < 0)
ps01 <- round(getPvalWilcox(data.minus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.minus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.minus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.minus, "60-80%", "80-100%"), 5)
p <- ggplot(data.minus, aes(x = enhDensityGroup, y = avgLog2FC)) +
geom_violin(aes(fill = enhDensityGroup),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE)+
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
labs(x = "Enhancer density group" , y = "avg log2(fold change)") +
stat_summary(
aes(group = enhDensityGroup), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+ coord_cartesian(ylim = c(-1.5, 0))+
annotate("text", x = 1, y = -1, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n"),
color = "black", hjust = 0, size = 2)
fileName <- paste0("tadGroup_enhDensity_avgLog2FC_down")
width <- panelSize(1.175)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### Plotting distribution - gene
#### ALL
# ylim <- max(abs(quantile(data$avgLog2FC, 0.01)), abs(quantile(data$avgLog2FC, 0.99)))
# p <- ggplot(data, aes(x = geneDensityGroup, y = avgLog2FC)) + geom_violin(aes(fill = geneDensityGroup))+
# geom_boxplot(outlier.shape = NA, width = 0.1) + theme_classic() +
# coord_cartesian(ylim = c(-ylim, ylim)) +
# geom_hline(yintercept = 0) + theme(legend.position = "none") +
# scale_fill_manual(values = c("#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252"))
#
# fileName <- paste0("tadGroup_geneDensity_avgLog2FC")
# height <- 3
# width <- 3
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
# print(p)
# dev.off()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(geneDensityGroup ==group1) )$avgLog2FC
distance2 <- (data %>% dplyr::filter(geneDensityGroup ==group2) )$avgLog2FC
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
data.plus <- data %>% dplyr::filter(avgLog2FC > 0)
ps01 <- round(getPvalWilcox(data.plus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.plus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.plus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.plus, "60-80%", "80-100%"), 5)
p <- ggplot(data.plus, aes(x = geneDensityGroup, y = avgLog2FC)) +
geom_violin(aes(fill = geneDensityGroup),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE)+
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
labs(x = "Gene density group" , y = "avg log2(fold change)") +
stat_summary(
aes(group = geneDensityGroup), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+ coord_cartesian(ylim = c(0, 1.5)) +
annotate("text", x = 1, y = 1, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n"),
color = "black", hjust = 0, size = 2)
fileName <- paste0("tadGroup_geneDensity_avgLog2FC_up")
width <- panelSize(1.15)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#### DOWN
data.minus <- data %>% dplyr::filter(avgLog2FC < 0)
ps01 <- round(getPvalWilcox(data.minus, "0-20%", "20-40%"), 5)
ps12 <- round(getPvalWilcox(data.minus, "20-40%", "40-60%"), 5)
ps23 <- round(getPvalWilcox(data.minus, "40-60%", "60-80%"), 5)
ps34 <- round(getPvalWilcox(data.minus, "60-80%", "80-100%"), 5)
p <- ggplot(data.minus, aes(x = geneDensityGroup, y = avgLog2FC)) +
geom_violin(aes(fill = geneDensityGroup),
color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE)+
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
labs(x = "Gene density group" , y = "avg log2(fold change)") +
stat_summary(
aes(group = geneDensityGroup), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
scale_fill_manual(values = c("#D9D9D9", "#BFBFBF", "#A6A6A6", "#8C8C8C", "#737373")) +
theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+ coord_cartesian(ylim = c(-1.5, 0))+
annotate("text", x = 1, y = -1, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n"),
color = "black", hjust = 0, size = 2)
fileName <- paste0("tadGroup_geneDensity_avgLog2FC_down")
width <- panelSize(1.175)*mmToInch
height <- panelSize(1.07)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### CDF plot
ks_result <- ks.test(
data %>% dplyr::filter(geneDensityGroup == "0-20%") %>% pull(absAvgLog2FC),
data %>% dplyr::filter(geneDensityGroup == "20-40%") %>% pull(absAvgLog2FC)
)
p <- ggplot(data, aes(x = absAvgLog2FC, color = geneDensityGroup)) +
scale_color_manual(values = rev(c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
labs(
x = "Abs. log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 1.5)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS),
) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("tadGroup_geneDensity_avgLog2FC_cdf")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
data <- data.minus
group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "0-20%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "20-40%"]
ks.test(group1_data, group2_data)
group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "20-40%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "40-60%"]
ks.test(group1_data, group2_data)
group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "40-60%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "60-80%"]
ks.test(group1_data, group2_data)
group1_data <- data$absAvgLog2FC[data$geneDensityGroup == "60-80%"]
group2_data <- data$absAvgLog2FC[data$geneDensityGroup == "80-100%"]
ks.test(group1_data, group2_data)
Checking how insulation score changes by treatment at DMSO TAD boundaries
name <- "chromo_cons_annoHierarchy"
consensus.loop.anno.tb <- fread(here(consensusDir, paste0(name, ".tsv")))
# Import insulation score calculated with python
resultDir <- here("../../result")
insScore.DMSO <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1DMSO.tsv")) %>%
dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.DMSO) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.dTAG <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1dTAG.tsv")) %>%
dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.dTAG) <- c("chrom", "start", "end", "insulationScore", "binID")
insScore.A485 <- fread(here(resultDir, "TAD", "insulationScore_25kb_G1A485.tsv")) %>%
dplyr::select(c("chrom", "start", "end", "log2_insulation_score_125000")) %>%
dplyr::mutate(binID = paste(chrom, start, end, sep = "_"))
colnames(insScore.A485) <- c("chrom", "start", "end", "insulationScore", "binID")
getInsulationScore <- function(chr, coordinate, insScore.tb){
temp.tb <- insScore.tb %>% dplyr::filter(chrom == chr, start < coordinate, end > coordinate)
out <- temp.tb$insulationScore
if(length(out) < 1){
return(NA)
}else{
return(temp.tb$insulationScore)
}
}
# Importing TAD boundaries
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% rowwise() %>%
dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
center = (start + end)/2,
insScore_DMSO = getInsulationScore(chr, center, insScore.DMSO),
insScore_dTAG = getInsulationScore(chr, center, insScore.dTAG),
insScore_A485 = getInsulationScore(chr, center, insScore.A485))
tad_boundary <- tad_boundary %>% filter(!if_any(everything(), is.na))
tad_boundary$density <- get_density(tad_boundary$insScore_DMSO, tad_boundary$insScore_dTAG, n = 100)
tad_boundary <- tad_boundary %>% dplyr::arrange(density)
ggplot(tad_boundary, aes(x = insScore_DMSO, y = insScore_dTAG, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() + coord_fixed() +
geom_abline(slope = 1, intercept = 0) + theme_bw()
tad_boundary$density <- get_density(tad_boundary$insScore_DMSO, tad_boundary$insScore_A485, n = 100)
tad_boundary <- tad_boundary %>% dplyr::arrange(density)
ggplot(tad_boundary, aes(x = insScore_DMSO, y = insScore_A485, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() + coord_fixed() +
geom_abline(slope = 1, intercept = 0) + theme_bw()
Previously, I tried to check the percentage of loops crossing the boundary per group which wasn’t fruitful. This time, try to do this on the differential loops. #### dTAG
### Importing differential regulatory loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(group = "UP/NO")
colnames(loop.up) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(group = "UP/NO")
colnames(loop.no) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(group = "DOWN")
colnames(loop.down) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loops <- bind_rows(bind_rows(loop.up, loop.no), loop.down)
### Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
start = start -175000/2,
end = end + 175000/2,
center = (start + end)/2)
### Functions
checkBoundaryCross <- function(chrom1, start1, end2, tad_boundary){
temp <- tad_boundary %>% dplyr::filter(chr == chrom1,
center > start1,
center < end2)
return(length(temp$center))
}
### Check overlap
loops <- loops %>% rowwise() %>%
dplyr::mutate(boundaryCross = checkBoundaryCross(chr1, start1, end2, tad_boundary))
### Plotting
summary_data <- loops %>%
group_by(group) %>%
summarise(percentage = mean(boundaryCross > 0) * 100)
summary_data$group <- factor(summary_data$group, levels = c("UP/NO", "DOWN"))
p <- ggplot(summary_data, aes(x = group, y = percentage)) +
geom_bar(stat = "identity", fill = darken(strong_green, amount = 0.2)) +
labs(y = "% of loops crossing TAD boundary", x = NULL) +
theme_classic() + ylim(0, 100) +
theme(
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
)
fileName <- paste0("boundaryCrossRatio")
width <- panelSize(1)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### Importing differential regulatory loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(group = "up")
colnames(loop.up) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(group = "no")
colnames(loop.no) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(group = "down")
colnames(loop.down) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", "group")
loops <- bind_rows(bind_rows(loop.up, loop.no), loop.down)
### Importing TAD boundary
tad_boundary <- fread(here("../../result/TAD", "TAD_25kb_125kb_otsu_boundaries_G1DMSO.bed"))
colnames(tad_boundary) <- c("chr", "start", "end")
tad_boundary <- tad_boundary %>% dplyr::mutate(tad_id = paste(chr, start, end, sep = "_"),
start = start -175000/2,
end = end + 175000/2,
center = (start + end)/2)
### Functions
checkBoundaryCross <- function(chrom1, start1, end2, tad_boundary){
temp <- tad_boundary %>% dplyr::filter(chr == chrom1,
center > start1,
center < end2)
return(length(temp$center))
}
### Check overlap
loops <- loops %>% rowwise() %>%
dplyr::mutate(boundaryCross = checkBoundaryCross(chr1, start1, end2, tad_boundary))
### Plotting
summary_data <- loops %>%
group_by(group) %>%
summarise(percentage = mean(boundaryCross > 0) * 100)
p <- ggplot(summary_data, aes(x = group, y = percentage)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Percentage of Rows with boundaryCross > 0 per Group",
x = "Group",
y = "Percentage") +
theme_classic() + ylim(0, 100)
fileName <- paste0("boundaryCrossRatio_A485")
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
# LOADING LOLA DB
library("simpleCache")
library("LOLA")
lolaDB = loadRegionDB("/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/reference/LOLACore_cached/mm10")
# FUNCTIONS
extractAnchor <- function(loop){
anchor1 <- loop %>% dplyr::select(c(1, 2, 3))
colnames(anchor1) <- c("chr", "start", "end")
anchor2 <- loop %>% dplyr::select(c(4, 5, 6))
colnames(anchor2) <- c("chr", "start", "end")
anchors <- reduce(makeGRangesFromDataFrame(bind_rows(anchor1, anchor2)))
return(anchors)
}
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)
# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.1 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothRetained.bedpe"))
anchor.1 <- (extractAnchor(loop.1))
overlaps <- findOverlaps(anchor.1, atac.gr)
anchor.1 <- pintersect(anchor.1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.2 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_AsyncSpecificPert.bedpe"))
anchor.2 <- (extractAnchor(loop.2))
overlaps <- findOverlaps(anchor.2, atac.gr)
anchor.2 <- pintersect(anchor.2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.3 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_G1SpecificPert.bedpe"))
anchor.3 <- (extractAnchor(loop.3))
overlaps <- findOverlaps(anchor.3, atac.gr)
anchor.3 <- pintersect(anchor.3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.4 <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_G1vsAsync_bothPert.bedpe"))
anchor.4 <- (extractAnchor(loop.4))
overlaps <- findOverlaps(anchor.4, atac.gr)
anchor.4 <- pintersect(anchor.4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchors <- list(anchor.1, anchor.2, anchor.3, anchor.4)
tbs <- list()
temps <- list()
# Process clusters c1 to c8
for (i in 1:4) {
anchor <- anchors[[i]]
# Run LOLA
result <- runLOLA(anchor, anchor.all, lolaDB)
tb <- as_tibble(result)
# Filter and summarize
tb <- tb %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
# Store tb
tbs[[i]] <- tb
# Select and rename oddsRatio
temp <- tb %>% dplyr::select(target, oddsRatio)
colnames(temp) <- c("target", paste0("OR_c", i))
# Store temp
temps[[i]] <- temp
}
# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
mutate_all(~replace_na(., 1))
colnames(temp) <- c("target", "bothRetained", "AsyncSpecificPerturb", "G1SpecificPerturb", "bothPerturb")
data <- temp %>% column_to_rownames("target") %>% as.matrix()
library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)
# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
overlaps <- findOverlaps(anchor.upno, atac.gr)
anchor.upno <- pintersect(anchor.upno[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)
# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv"), sep = "\t")
# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv"), sep = "\t")
# UPNO
result = runLOLA(anchor.upno, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac.tsv"), sep = "\t")
# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv"), sep = "\t")
### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")
temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))
data <- as.matrix(temp[2:4])
rownames(data) <- temp$target
library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
# p <- Heatmap(
# data,
# name = "Odds Ratio", # Name of the heatmap legend
# cluster_columns = FALSE, # Remove column dendrogram
# row_km = 4, # Define the number of k-means clusters for rows (adjust as needed)
# show_row_dend = FALSE,
# col = col_fun # Use the red gradient color scale
# )
#
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_regAnchorBackground_atac")
# height <- 7
# width <- 3.5
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
# print(p)
# dev.off()
#
### Visualizing p-value and OR
alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
# dplyr::mutate(target = toupper(antibody),
# qValueLog = -log2(qValue),
# group = "UP") %>%
# filter(str_to_lower(cellType) == "embryonic stem cell") %>%
# dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
# slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
# dplyr::mutate(target = toupper(antibody),
# qValueLog = -log2(qValue),
# group = "NO") %>%
# filter(str_to_lower(cellType) == "embryonic stem cell") %>%
# dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
# slice_min(meanRnk, with_ties = FALSE)
tb.upno <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log10(qValue),
group = "UP/NO") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.upno %>% dplyr::select(c(24, 16, 20,
4, 25, 5,
7, 8, 9, 10, 11,
12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_atac_pub.tsv"), sep = "\t")
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log10(qValue),
group = "DOWN") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.down %>% dplyr::select(c(24, 16, 20,
4, 25, 5,
7, 8, 9, 10, 11,
12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac_pub.tsv"), sep = "\t")
temp.upno <- tb.upno %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp <- bind_rows(temp.upno, temp.down)
# order <- c((temp %>% dplyr::filter(group == "UP/NO") %>% arrange(desc(oddsRatio)))$target,
# (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
# temp$target <- factor(temp$target, levels = rev(order))
temp$group <- factor(temp$group, levels = c("UP/NO", "DOWN"))
targetList <- c("POLR2A", "CTR9",
"AFF4", "ELL2",
"MED1", "MED12",
"TBP", "TAF1", "TAF3",
"E2F1", "YY1", "NIPBL",
"EP300", "DPY30", "SETDB1",
"RAD21", "SMC1A", "SMC3", "CTCF",
"SUZ12", "PHF19"
)
temp <- temp %>% dplyr::filter(target %in% targetList)
temp$target <- factor(temp$target, levels = rev(targetList))
qValueLogMax <- 50
temp2 <- temp %>% dplyr::mutate(qValueLog = min(qValueLog, qValueLogMax))
p <- ggplot(temp2, aes(x = group, y = target, fill = oddsRatio, size = qValueLog)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 1*ptToMM # Line width for the border
) + theme_bw() +
scale_size_continuous(range = c(0.5, 2)) + # Set min and max point sizes here
scale_fill_gradient(low = "white", high = "#CB333A",
limits = c(1, 3),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme(
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
# p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
# geom_point() + theme_bw() +
# scale_size_continuous(range = c(1, 3)) + # Set min and max point sizes here
# scale_color_gradient(low = "blue", high = "red",
# guide = guide_colorbar(
# barwidth = 1.5/5.08, # Adjust width of the color bar
# barheight = 15/5.08 # Adjust height of the color bar
# )) +
# labs(x = NULL, y = NULL) +
# theme(
# axis.title = element_text(
# size = fontSizeS,
# family = fontType,
# color = "#000000"
# ),
# axis.text = element_text(
# size = fontSizeS,
# family = fontType,
# color = "#000000"
# ),
# axis.text.x = element_text(
# angle = 45, # Rotate x-axis labels 45 degrees
# hjust = 1, # Adjust horizontal justification
# vjust = 1 # Adjust vertical justification
# ),
# axis.line = element_line(
# color = "#000000",
# size = lineThick*mmToLineUnit,
# lineend = "square"
# ),
# axis.ticks = element_line(
# color = "#000000",
# size = lineThick*mmToLineUnit,
# lineend = "square"
# ),
# panel.background = element_rect(fill = "transparent"),
# legend.text = element_text(family = fontType, size = fontSizeS),
# legend.title = element_text(family = fontType, size = fontSizeS)
# )
fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac_upno")
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.9)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### 241015 Testing on subset of loops
anchor.c1 <- extractAnchor(loop.cluster1)
overlaps <- findOverlaps(anchor.c1, atac.gr)
anchor.c1 <- pintersect(anchor.c1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c2 <- extractAnchor(loop.cluster2)
overlaps <- findOverlaps(anchor.c2, atac.gr)
anchor.c2 <- pintersect(anchor.c2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c3 <- extractAnchor(loop.cluster3)
overlaps <- findOverlaps(anchor.c3, atac.gr)
anchor.c3 <- pintersect(anchor.c3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c4 <- extractAnchor(loop.cluster4)
overlaps <- findOverlaps(anchor.c4, atac.gr)
anchor.c4 <- pintersect(anchor.c4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c5 <- extractAnchor(loop.cluster5)
overlaps <- findOverlaps(anchor.c5, atac.gr)
anchor.c5 <- pintersect(anchor.c5[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c6 <- extractAnchor(loop.cluster6)
overlaps <- findOverlaps(anchor.c6, atac.gr)
anchor.c6 <- pintersect(anchor.c6[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c7 <- extractAnchor(loop.cluster7)
overlaps <- findOverlaps(anchor.c7, atac.gr)
anchor.c7 <- pintersect(anchor.c7[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c8 <- extractAnchor(loop.cluster8)
overlaps <- findOverlaps(anchor.c8, atac.gr)
anchor.c8 <- pintersect(anchor.c8[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
result = runLOLA(anchor.c1, anchor.all, lolaDB)
tb.c1 = as_tibble(result)
result = runLOLA(anchor.c2, anchor.all, lolaDB)
tb.c2 = as_tibble(result)
result = runLOLA(anchor.c3, anchor.all, lolaDB)
tb.c3 = as_tibble(result)
result = runLOLA(anchor.c4, anchor.all, lolaDB)
tb.c4 = as_tibble(result)
result = runLOLA(anchor.c5, anchor.all, lolaDB)
tb.c5 = as_tibble(result)
result = runLOLA(anchor.c6, anchor.all, lolaDB)
tb.c6 = as_tibble(result)
result = runLOLA(anchor.c7, anchor.all, lolaDB)
tb.c7 = as_tibble(result)
result = runLOLA(anchor.c8, anchor.all, lolaDB)
tb.c8 = as_tibble(result)
### HEATMAP
alpha <- 0.05
tb.c1 <- tb.c1 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c2 <- tb.c2 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c3 <- tb.c3 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c4 <- tb.c4 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c5 <- tb.c5 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c6 <- tb.c6 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c7 <- tb.c7 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c8 <- tb.c8 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.c1 <- tb.c1 %>% dplyr::select(target, oddsRatio)
colnames(temp.c1) <- c("target", "OR_c1")
temp.c2 <- tb.c2 %>% dplyr::select(target, oddsRatio)
colnames(temp.c2) <- c("target", "OR_c2")
temp.c3 <- tb.c3 %>% dplyr::select(target, oddsRatio)
colnames(temp.c3) <- c("target", "OR_c3")
temp.c4 <- tb.c4 %>% dplyr::select(target, oddsRatio)
colnames(temp.c4) <- c("target", "OR_c4")
temp.c5 <- tb.c5 %>% dplyr::select(target, oddsRatio)
colnames(temp.c5) <- c("target", "OR_c5")
temp.c6 <- tb.c6 %>% dplyr::select(target, oddsRatio)
colnames(temp.c6) <- c("target", "OR_c6")
temp.c7 <- tb.c7 %>% dplyr::select(target, oddsRatio)
colnames(temp.c7) <- c("target", "OR_c7")
temp.c8 <- tb.c8 %>% dplyr::select(target, oddsRatio)
colnames(temp.c8) <- c("target", "OR_c8")
temp <- full_join(temp.c1, temp.c2, by = "target") %>%
full_join(temp.c3, by = "target") %>%
full_join(temp.c4, by = "target") %>%
full_join(temp.c5, by = "target") %>%
full_join(temp.c6, by = "target") %>%
full_join(temp.c7, by = "target") %>%
full_join(temp.c8, by = "target") %>%
mutate_all(~replace_na(., 1))
data <- temp %>% column_to_rownames("target") %>% as.matrix()
library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
# Initialize lists to store results
anchors <- list()
tbs <- list()
temps <- list()
# Process clusters c1 to c8
for (i in 1:4) {
# Extract anchor
loop_cluster <- get(paste0("loop.cluster", i))
anchor <- extractAnchor(loop_cluster)
# Find overlaps and intersect
overlaps <- findOverlaps(anchor, atac.gr)
anchor <- pintersect(anchor[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# Store anchor
anchors[[i]] <- anchor
# Run LOLA
result <- runLOLA(anchor, anchor.all, lolaDB)
tb <- as_tibble(result)
# Filter and summarize
tb <- tb %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
# Store tb
tbs[[i]] <- tb
# Select and rename oddsRatio
temp <- tb %>% dplyr::select(target, oddsRatio)
colnames(temp) <- c("target", paste0("OR_c", i))
# Store temp
temps[[i]] <- temp
}
# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
mutate_all(~replace_na(., 1))
data <- temp %>% column_to_rownames("target") %>% as.matrix()
library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)
# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
overlaps <- findOverlaps(anchor.upno, atac.gr)
anchor.upno <- pintersect(anchor.upno[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loopNum <- nrow(loop.upno)
# Selecting same number of extreme down loops
loop.down <- loop.all %>% dplyr::filter(diff_dTAG_DMSO < -0.2) %>% dplyr::arrange(diff_dTAG_DMSO) %>% slice_head(n = loopNum)
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)
# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv"), sep = "\t")
# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv"), sep = "\t")
# UPNO
result = runLOLA(anchor.upno, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac.tsv"), sep = "\t")
# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
#fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv"), sep = "\t")
### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")
temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))
data <- as.matrix(temp[2:4])
rownames(data) <- temp$target
library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
# p <- Heatmap(
# data,
# name = "Odds Ratio", # Name of the heatmap legend
# cluster_columns = FALSE, # Remove column dendrogram
# row_km = 10, # Define the number of k-means clusters for rows (adjust as needed)
# show_row_dend = FALSE,
# col = col_fun,
# border = TRUE
# )
#
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_allAnchorBackground_atac")
# height <- 7
# width <- 3.5
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
# print(p)
# dev.off()
### Visualizing p-value and OR
alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
# dplyr::mutate(target = toupper(antibody),
# qValueLog = -log2(qValue),
# group = "UP") %>%
# filter(str_to_lower(cellType) == "embryonic stem cell") %>%
# dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
# slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
# dplyr::mutate(target = toupper(antibody),
# qValueLog = -log2(qValue),
# group = "NO") %>%
# filter(str_to_lower(cellType) == "embryonic stem cell") %>%
# dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
# slice_min(meanRnk, with_ties = FALSE)
tb.upno <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log10(qValue),
group = "UP/NO") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.upno %>% dplyr::select(c(24, 16, 20,
4, 25, 5,
7, 8, 9, 10, 11,
12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_upno_allLoops_extreme_atac_pub.tsv"), sep = "\t")
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log10(qValue),
group = "DOWN") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
fwrite(tb.down %>% dplyr::select(c(24, 16, 20,
4, 25, 5,
7, 8, 9, 10, 11,
12, 13, 14)), here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_allLoops_extreme_atac_pub.tsv"), sep = "\t")
# temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.upno <- tb.upno %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp <- bind_rows(temp.upno, temp.down)
#
# order <- c((temp %>% dplyr::filter(group == "UP/NO") %>% arrange(desc(oddsRatio)))$target,
# (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
# temp$target <- factor(temp$target, levels = rev(order))
temp$group <- factor(temp$group, levels = c("UP/NO", "DOWN"))
targetList <- c("POLR2A", "CTR9",
"AFF4", "ELL2",
"MED1", "MED12",
"TBP", "TAF1", "TAF3",
"ESRRB", "KLF4", "NANOG", "POU5F1", "SOX2", "STAT3", "E2F1", "YY1",
"EP300", "DPY30", "EZH2", "KDM2B", "KDB4B", "KDM4C", "KDM6B", "RBBP5",
"RAD21", "SMC1A", "SMC3", "CTCF",
"JARID2", "SUZ12"
)
temp <- temp %>% dplyr::filter(target %in% targetList)
temp$target <- factor(temp$target, levels = rev(targetList))
# MAX qValueLog to 50
qValueLogMax <- 50
temp2 <- temp %>% dplyr::mutate(qValueLog = min(qValueLog, qValueLogMax))
p <- ggplot(temp2, aes(x = group, y = target, fill = oddsRatio, size = qValueLog)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 0.5*ptToMM # Line width for the border
) + theme_bw() +
scale_size_continuous(range = c(0.5, 2)) + # Set min and max point sizes here
scale_fill_gradient(low = "white", high = "#CB333A",
limits = c(1, 3),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme(
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_OR_qValue_allAnchorBackground_atac_ordered")
width <- panelSize(1.5)*mmToInch
height <- panelSize(2.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)
# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_UP_diff0.5.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_NO_diff0.5.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_all_dTAGvsDMSO_DOWN_diff0.5.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)
# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv"), sep = "\t")
# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv"), sep = "\t")
# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv"), sep = "\t")
### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")
temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))
data <- as.matrix(temp[2:4])
rownames(data) <- temp$target
library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
row_km = 4, # Define the number of k-means clusters for rows (adjust as needed)
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_allLoops_allAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
############
#
# ### Visualizing p-value and OR
# alpha <- 0.05
# tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_allLoops_atac.tsv")) %>%
# dplyr::mutate(target = toupper(antibody),
# qValueLog = -log2(qValue),
# group = "UP") %>%
# filter(str_to_lower(cellType) == "embryonic stem cell") %>%
# dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
# slice_min(meanRnk, with_ties = FALSE)
# tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_allLoops_atac.tsv")) %>%
# dplyr::mutate(target = toupper(antibody),
# qValueLog = -log2(qValue),
# group = "NO") %>%
# filter(str_to_lower(cellType) == "embryonic stem cell") %>%
# dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
# slice_min(meanRnk, with_ties = FALSE)
# tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_allLoops_atac.tsv")) %>%
# dplyr::mutate(target = toupper(antibody),
# qValueLog = -log2(qValue),
# group = "DOWN") %>%
# filter(str_to_lower(cellType) == "embryonic stem cell") %>%
# dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
# slice_min(meanRnk, with_ties = FALSE)
#
# temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
# temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)
#
# temp <- bind_rows(temp.up, temp.down)
#
# order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target,
# (temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
# temp$target <- factor(temp$target, levels = rev(order))
# p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
# geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
# labs(x = NULL, y = NULL) + scale_size_continuous(range = c(1, 3)) +
# theme(axis.text = element_text(size = 6), # Set axis text size
# axis.title = element_text(size = 6), # Set axis title size (if not removed)
# legend.text = element_text(size = 6), # Set legend text size
# legend.title = element_text(size = 6))
#
# fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_allLoops_OR_allAnchorBackground_atac")
# height <-3
# width <- 2
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
# svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
# print(p)
# dev.off()
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)
# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_UP_diff0.5.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_NO_diff0.5.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_logOE_pe-pe_dTAGvsDMSO_DOWN_diff0.5.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)
# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_pe-peLoops_atac.tsv"), sep = "\t")
# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_pe-peLoops_atac.tsv"), sep = "\t")
# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_pe-peLoops_atac.tsv"), sep = "\t")
### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_up_pe-peLoops_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_no_pe-peLoops_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_logOE_diff0.5_down_pe-peLoops_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")
temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))
data <- as.matrix(temp[2:4])
rownames(data) <- temp$target
library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
row_km = 4, # Define the number of k-means clusters for rows (adjust as needed)
show_row_dend = FALSE,
col = col_fun # Use the red gradient color scale
)
fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_logOE_diff0.5_pe-peLoops_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_up_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "UP") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_no_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "NO") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_dTAG_vs_DMSO_diff0.2_down_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "DOWN") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp <- bind_rows(temp.up, temp.down)
order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target,
(temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
labs(x = NULL, y = NULL) +
theme(axis.text = element_text(size = 6), # Set axis text size
axis.title = element_text(size = 6), # Set axis title size (if not removed)
legend.text = element_text(size = 6), # Set legend text size
legend.title = element_text(size = 6))
fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### 241015 Testing on subset of loops
anchor.c1 <- extractAnchor(loop.cluster1)
overlaps <- findOverlaps(anchor.c1, atac.gr)
anchor.c1 <- pintersect(anchor.c1[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c2 <- extractAnchor(loop.cluster2)
overlaps <- findOverlaps(anchor.c2, atac.gr)
anchor.c2 <- pintersect(anchor.c2[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c3 <- extractAnchor(loop.cluster3)
overlaps <- findOverlaps(anchor.c3, atac.gr)
anchor.c3 <- pintersect(anchor.c3[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c4 <- extractAnchor(loop.cluster4)
overlaps <- findOverlaps(anchor.c4, atac.gr)
anchor.c4 <- pintersect(anchor.c4[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c5 <- extractAnchor(loop.cluster5)
overlaps <- findOverlaps(anchor.c5, atac.gr)
anchor.c5 <- pintersect(anchor.c5[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c6 <- extractAnchor(loop.cluster6)
overlaps <- findOverlaps(anchor.c6, atac.gr)
anchor.c6 <- pintersect(anchor.c6[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c7 <- extractAnchor(loop.cluster7)
overlaps <- findOverlaps(anchor.c7, atac.gr)
anchor.c7 <- pintersect(anchor.c7[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchor.c8 <- extractAnchor(loop.cluster8)
overlaps <- findOverlaps(anchor.c8, atac.gr)
anchor.c8 <- pintersect(anchor.c8[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
result = runLOLA(anchor.c1, anchor.all, lolaDB)
tb.c1 = as_tibble(result)
result = runLOLA(anchor.c2, anchor.all, lolaDB)
tb.c2 = as_tibble(result)
result = runLOLA(anchor.c3, anchor.all, lolaDB)
tb.c3 = as_tibble(result)
result = runLOLA(anchor.c4, anchor.all, lolaDB)
tb.c4 = as_tibble(result)
result = runLOLA(anchor.c5, anchor.all, lolaDB)
tb.c5 = as_tibble(result)
result = runLOLA(anchor.c6, anchor.all, lolaDB)
tb.c6 = as_tibble(result)
result = runLOLA(anchor.c7, anchor.all, lolaDB)
tb.c7 = as_tibble(result)
result = runLOLA(anchor.c8, anchor.all, lolaDB)
tb.c8 = as_tibble(result)
### HEATMAP
alpha <- 0.05
tb.c1 <- tb.c1 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c2 <- tb.c2 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c3 <- tb.c3 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c4 <- tb.c4 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c5 <- tb.c5 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c6 <- tb.c6 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c7 <- tb.c7 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.c8 <- tb.c8 %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.c1 <- tb.c1 %>% dplyr::select(target, oddsRatio)
colnames(temp.c1) <- c("target", "OR_c1")
temp.c2 <- tb.c2 %>% dplyr::select(target, oddsRatio)
colnames(temp.c2) <- c("target", "OR_c2")
temp.c3 <- tb.c3 %>% dplyr::select(target, oddsRatio)
colnames(temp.c3) <- c("target", "OR_c3")
temp.c4 <- tb.c4 %>% dplyr::select(target, oddsRatio)
colnames(temp.c4) <- c("target", "OR_c4")
temp.c5 <- tb.c5 %>% dplyr::select(target, oddsRatio)
colnames(temp.c5) <- c("target", "OR_c5")
temp.c6 <- tb.c6 %>% dplyr::select(target, oddsRatio)
colnames(temp.c6) <- c("target", "OR_c6")
temp.c7 <- tb.c7 %>% dplyr::select(target, oddsRatio)
colnames(temp.c7) <- c("target", "OR_c7")
temp.c8 <- tb.c8 %>% dplyr::select(target, oddsRatio)
colnames(temp.c8) <- c("target", "OR_c8")
temp <- full_join(temp.c1, temp.c2, by = "target") %>%
full_join(temp.c3, by = "target") %>%
full_join(temp.c4, by = "target") %>%
full_join(temp.c5, by = "target") %>%
full_join(temp.c6, by = "target") %>%
full_join(temp.c7, by = "target") %>%
full_join(temp.c8, by = "target") %>%
mutate_all(~replace_na(., 1))
data <- temp %>% column_to_rownames("target") %>% as.matrix()
library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
# Initialize lists to store results
anchors <- list()
tbs <- list()
temps <- list()
# Process clusters c1 to c8
for (i in 1:4) {
# Extract anchor
loop_cluster <- get(paste0("loop.cluster", i))
anchor <- extractAnchor(loop_cluster)
# Find overlaps and intersect
overlaps <- findOverlaps(anchor, atac.gr)
anchor <- pintersect(anchor[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# Store anchor
anchors[[i]] <- anchor
# Run LOLA
result <- runLOLA(anchor, anchor.all, lolaDB)
tb <- as_tibble(result)
# Filter and summarize
tb <- tb %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
# Store tb
tbs[[i]] <- tb
# Select and rename oddsRatio
temp <- tb %>% dplyr::select(target, oddsRatio)
colnames(temp) <- c("target", paste0("OR_c", i))
# Store temp
temps[[i]] <- temp
}
# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
mutate_all(~replace_na(., 1))
data <- temp %>% column_to_rownames("target") %>% as.matrix()
library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)
# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loopNum <- nrow(loop.down)
# Selecting same number of extreme no loops
loop.no <- loop.all %>%
dplyr::mutate(absDiff = abs(diff_A485_DMSO)) %>%
dplyr::filter(absDiff < 0.2) %>%
dplyr::arrange(absDiff) %>% slice_head(n = loopNum)
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)
# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv"), sep = "\t")
# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv"), sep = "\t")
# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv"), sep = "\t")
### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")
temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))
data <- as.matrix(temp[2:4])
rownames(data) <- temp$target
library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
row_km = 5, # Define the number of k-means clusters for rows (adjust as needed)
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_allLoops_extreme_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "UP") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "NO") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_allLoops_extreme_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "DOWN") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp <- bind_rows(temp.up, temp.down)
order <- unique(c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target,
(temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target))
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
labs(x = NULL, y = NULL) + scale_size_continuous(range = c(1, 3)) +
theme(axis.text = element_text(size = 6), # Set axis text size
axis.title = element_text(size = 6), # Set axis title size (if not removed)
legend.text = element_text(size = 6), # Set legend text size
legend.title = element_text(size = 6))
fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_allLoops_extreme_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)
# LOADING LOOPS
### Importing differential regulatory loops & extract anchor
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
overlaps <- findOverlaps(anchor.up, atac.gr)
anchor.up <- pintersect(anchor.up[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
overlaps <- findOverlaps(anchor.no, atac.gr)
anchor.no <- pintersect(anchor.no[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
overlaps <- findOverlaps(anchor.down, atac.gr)
anchor.down <- pintersect(anchor.down[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
# RUNNING LOLA
lolaDir <- here("../../result/lola")
dir.create(lolaDir, showWarnings = FALSE, recursive = TRUE)
# UP
result = runLOLA(anchor.up, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv"), sep = "\t")
# NO
result = runLOLA(anchor.no, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv"), sep = "\t")
# DOWN
result = runLOLA(anchor.down, anchor.all, lolaDB)
tb = as_tibble(result)
fwrite(tb, here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv"), sep = "\t")
### HEATMAP
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio)
colnames(temp.up) <- c("target", "OR_up")
temp.no <- tb.no %>% dplyr::select(target, oddsRatio)
colnames(temp.no) <- c("target", "OR_no")
temp.down <- tb.down %>% dplyr::select(target, oddsRatio)
colnames(temp.down) <- c("target", "OR_down")
temp <- full_join(full_join(temp.up, temp.no, by = c("target")), temp.down, by = c("target")) %>% mutate_all(~replace_na(., 1))
data <- as.matrix(temp[2:4])
rownames(data) <- temp$target
library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
row_km = 4, # Define the number of k-means clusters for rows (adjust as needed)
show_row_dend = FALSE,
col = col_fun # Use the red gradient color scale
)
fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### Visualizing p-value and OR
alpha <- 0.05
tb.up <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_up_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "UP") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.no <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_no_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "NO") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
tb.down <- fread(here(lolaDir, "LOLA_A485_vs_DMSO_diff0.2_down_atac.tsv")) %>%
dplyr::mutate(target = toupper(antibody),
qValueLog = -log2(qValue),
group = "DOWN") %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>% dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
temp.up <- tb.up %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.no <- tb.no %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp.down <- tb.down %>% dplyr::select(target, oddsRatio, qValueLog, group)
temp <- bind_rows(temp.up, temp.down)
order <- c((temp %>% dplyr::filter(group == "UP") %>% arrange(desc(oddsRatio)))$target,
(temp %>% dplyr::filter(group == "DOWN") %>% arrange(desc(oddsRatio)))$target)
temp$target <- factor(temp$target, levels = rev(order))
p <- ggplot(temp, aes(x = group, y = target, color = qValueLog, size = oddsRatio)) +
geom_point() + theme_bw() + scale_color_gradient(low = "blue", high = "red") +
labs(x = NULL, y = NULL) +
theme(axis.text = element_text(size = 6), # Set axis text size
axis.title = element_text(size = 6), # Set axis title size (if not removed)
legend.text = element_text(size = 6), # Set legend text size
legend.title = element_text(size = 6))
fileName <- paste0("anchorLOLA_A485_vs_DMSO_diff0.2_OR_qValue_regAnchorBackground_atac")
height <-3
width <- 2
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
The aim is to see if there is a trend in RNA expression level among group 1, 2, 3, 4. It would be making comparison among genes. For this, TPM should be used.
## Importing groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene
## Importing RNA-seq TPM for DMSO
tpm.selected <- fread(here(refDir, "readCount.filtered.TPM.all.tsv")) %>% dplyr::select(1, 3, 4, 5)
colnames(tpm.selected) <- c("ensembl", "rep1", "rep2", "rep3")
tpm.selected <- tpm.selected %>% dplyr::rowwise() %>%
dplyr::mutate(
group = ifelse(ensembl %in% group1, "group1",
ifelse(ensembl %in% group2, "group2",
ifelse(ensembl %in% group5, "group5",
ifelse(ensembl %in% group8, "group8", NA))))) %>%
dplyr::filter(!is.na(group)) %>%
dplyr::mutate(avgTPM = mean(rep1, rep2, rep3))
ggplot(tpm.selected, aes(x = group, y = avgTPM)) +
geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
scale_y_log10() +
theme_classic()
p <- ggplot(temp.tb, aes(x = group, y = score)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + coord_cartesian(ylim = c(ymin, ymax)) +
annotate("text", x = 1, y = ymin + 1, label = paste0("p12: ", convPvalue(p12), "\n",
"p15: ", convPvalue(p15), "\n",
"p18: ", convPvalue(p18), "\n",
"p25: ", convPvalue(p25), "\n",
"p28: ", convPvalue(p28), "\n",
"p58: ",convPvalue( p58), "\n"),
color = "black", hjust = 0, size = 3)
fileName <- paste0("insulation_score_", note)
height <- 3
width <- 3
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
## Importing groups
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group2.tsv"))$gene
group5 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group5.tsv"))$gene
group8 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_group8.tsv"))$gene
## Importing Bobbie gene classification
geneCluster <- fread(here(refDir, "bobbie_gene_classification.csv")) %>%
dplyr::select(gene, enst, Cluster)
colnames(geneCluster) <- c("gene", "ensembl_transcript_id", "cluster")
## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
filters = "ensembl_transcript_id",
values = geneCluster$ensembl_transcript,
mart = ensembl.v102)
geneCluster <- geneCluster %>% dplyr::left_join(idPair_tg, by = c("ensembl_transcript_id"))
# Making data for stacked barplot
countGene <- function(geneCluster, groupName, clusterName){
num <- nrow(geneCluster %>% dplyr::filter(ensembl_gene_id %in% groupName,
cluster %in% clusterName))
return(num)
}
countGeneList <- function(geneCluster, groupName){
n1 <- countGene(geneCluster, groupName, "Early")
n2 <- countGene(geneCluster, groupName, "Middle")
n3 <-countGene(geneCluster, groupName, "Late")
n4 <- countGene(geneCluster, groupName, "Transient")
return(c(n1, n2, n3, n4))
}
group <- c(rep("group1", 4), rep("group2", 4), rep("group3", 4), rep("group4", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 4)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, group1),
countGeneList(geneCluster, group2),
countGeneList(geneCluster, group5),
countGeneList(geneCluster, group8))
data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
# Statistical analysis
# Similar to the Chi-Square test, Fisher’s Exact Test is used when you have smaller sample sizes or when the expected frequency in any cell of the contingency table is below 5.
temp <- data %>% dplyr::filter(group %in% c("group1", "group2"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result
temp <- data %>% dplyr::filter(group %in% c("group1", "group3"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result
temp <- data %>% dplyr::filter(group %in% c("group1", "group4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result
### P-N
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
pnOver8 <- (temp2 %>% dplyr::filter(total >= 8))$gene
pnOver6 <- (temp2 %>% dplyr::filter(total >= 6, total < 8))$gene
pnOver4 <- (temp2 %>% dplyr::filter(total >= 4, total < 6))$gene
pnOver2 <- (temp2 %>% dplyr::filter(total >= 2, total < 4))$gene
pnOver0 <- (temp2 %>% dplyr::filter(total < 2))$gene
group <- c(rep("pnOver8", 4), rep("pnOver6", 4), rep("pnOver4", 4), rep("pnOver2", 4), rep("pnOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, pnOver8),
countGeneList(geneCluster, pnOver6),
countGeneList(geneCluster, pnOver4),
countGeneList(geneCluster, pnOver2),
countGeneList(geneCluster, pnOver0))
data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
temp <- data %>% dplyr::filter(group %in% c("pnOver2", "pnOver8"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result
### P-S
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene
group <- c(rep("psOver4", 4), rep("psOver3", 4), rep("psOver2", 4), rep("psOver1", 4), rep("psOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, psOver4),
countGeneList(geneCluster, psOver3),
countGeneList(geneCluster, psOver2),
countGeneList(geneCluster, psOver1),
countGeneList(geneCluster, psOver0))
data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
temp <- data %>% dplyr::filter(group %in% c("psOver2", "psOver4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result
### P-E
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene
group <- c(rep("peOver4", 4), rep("peOver3", 4), rep("peOver2", 4), rep("peOver1", 4), rep("peOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, peOver4),
countGeneList(geneCluster, peOver3),
countGeneList(geneCluster, peOver2),
countGeneList(geneCluster, peOver1),
countGeneList(geneCluster, peOver0))
data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
### P-P
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene
group <- c(rep("ppOver4", 4), rep("ppOver3", 4), rep("ppOver2", 4), rep("ppOver1", 4), rep("ppOver0", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 5)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, ppOver4),
countGeneList(geneCluster, ppOver3),
countGeneList(geneCluster, ppOver2),
countGeneList(geneCluster, ppOver1),
countGeneList(geneCluster, ppOver0))
data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
## Importing groups
group1 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group2.tsv"))$gene
group3 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group3.tsv"))$gene
group4 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group4.tsv"))$gene
group5 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group5.tsv"))$gene
group6 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group6.tsv"))$gene
group7 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group7.tsv"))$gene
group8 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group8.tsv"))$gene
group9 <- fread(here(refDir, "geneList_A485_vs_DMSO_RNA_loop_group9.tsv"))$gene
## Importing Bobbie gene classification
geneCluster <- fread(here(refDir, "bobbie_gene_classification.csv")) %>%
dplyr::select(gene, enst, Cluster)
colnames(geneCluster) <- c("gene", "ensembl_transcript_id", "cluster")
## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
filters = "ensembl_transcript_id",
values = geneCluster$ensembl_transcript,
mart = ensembl.v102)
geneCluster <- geneCluster %>% dplyr::left_join(idPair_tg, by = c("ensembl_transcript_id"))
# Making data for stacked barplot
countGene <- function(geneCluster, groupName, clusterName){
num <- nrow(geneCluster %>% dplyr::filter(ensembl_gene_id %in% groupName,
cluster %in% clusterName))
return(num)
}
countGeneList <- function(geneCluster, groupName){
n1 <- countGene(geneCluster, groupName, "Early")
n2 <- countGene(geneCluster, groupName, "Middle")
n3 <-countGene(geneCluster, groupName, "Late")
n4 <- countGene(geneCluster, groupName, "Transient")
return(c(n1, n2, n3, n4))
}
group <- c(rep("group1", 4), rep("group2", 4), rep("group3", 4),
rep("group4", 4), rep("group5", 4), rep("group6", 4),
rep("group7", 4), rep("group8", 4), rep("group9", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 9)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(countGeneList(geneCluster, group1),
countGeneList(geneCluster, group2),
countGeneList(geneCluster, group3),
countGeneList(geneCluster, group4),
countGeneList(geneCluster, group5),
countGeneList(geneCluster, group6),
countGeneList(geneCluster, group7),
countGeneList(geneCluster, group8),
countGeneList(geneCluster, group9))
data <- data.frame(group, cluster, value)
# Plotting
ggplot(data, aes(fill=cluster, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
# Statistical analysis
# Similar to the Chi-Square test, Fisher’s Exact Test is used when you have smaller sample sizes or when the expected frequency in any cell of the contingency table is below 5.
temp <- data %>% dplyr::filter(group %in% c("group1", "group2"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result
temp <- data %>% dplyr::filter(group %in% c("group1", "group3"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result
temp <- data %>% dplyr::filter(group %in% c("group1", "group4"))
contingency_table <- xtabs(value ~ group + cluster, data = temp)
fisher_result <- fisher.test(contingency_table, simulate.p.value = TRUE, B = 100000)
fisher_result
extractAnchor <- function(loop){
anchor1 <- loop %>% dplyr::select(c(1, 2, 3))
colnames(anchor1) <- c("chr", "start", "end")
anchor2 <- loop %>% dplyr::select(c(4, 5, 6))
colnames(anchor2) <- c("chr", "start", "end")
anchors <- reduce(makeGRangesFromDataFrame(bind_rows(anchor1, anchor2)))
return(anchors)
}
runFisherExact <- function(interest.gr, background.gr, query.gr){
overlaps_interest <- countOverlaps(interest.gr, query.gr)
a <- sum(overlaps_interest > 0)
total_interest <- length(interest.gr)
c <- total_interest - a
overlaps_background <- countOverlaps(background.gr, query.gr)
b <- sum(overlaps_background > 0)
total_background <- length(background.gr)
d <- total_background - b
# Construct contingency table
contingency_table <- matrix(c(a, c, b, d), nrow=2, byrow=TRUE,
dimnames=list("Region" = c("interest.gr", "background.gr"),
"Overlap" = c("Yes", "No")))
# Perform Fisher's exact test
fisher_result <- fisher.test(contingency_table)
return(fisher_result)
}
runFisherExactCombination <- function(interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr,
targetName,
retained.gr, lost.gr){
# SEEDING
temp <- runFisherExact(loop.up.gr, background.gr, retained.gr)
result.tb <- tibble(interest = paste0(interestName, "_UP"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.no.gr, background.gr, retained.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_NO"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_DOWN"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.up.gr, background.gr, lost.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_UP"),
target = paste0(targetName, "_lost"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.no.gr, background.gr, lost.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_NO"),
target = paste0(targetName, "_lost"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_DOWN"),
target = paste0(targetName, "_lost"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
return(result.tb)
}
runFisherExactCombinationBinary <- function(interestName,
loop.upno.gr, loop.down.gr,
background.gr,
targetName,
retained.gr, lost.gr){
# SEEDING
temp <- runFisherExact(loop.upno.gr, background.gr, retained.gr)
result.tb <- tibble(interest = paste0(interestName, "_UP/NO"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_DOWN"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.upno.gr, background.gr, lost.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_UP/NO"),
target = paste0(targetName, "_lost"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_DOWN"),
target = paste0(targetName, "_lost"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
return(result.tb)
}
runFisherExactCombinationRetained <- function(interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr,
targetName,
retained.gr){
# SEEDING
temp <- runFisherExact(loop.up.gr, background.gr, retained.gr)
result.tb <- tibble(interest = paste0(interestName, "_UP"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.no.gr, background.gr, retained.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_NO"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_DOWN"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
return(result.tb)
}
runFisherExactCombinationTarget <- function(targetName, interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr){
# RETAINED loop
retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(retained) <- c("chr", "start", "end")
retained.gr <- makeGRangesFromDataFrame(retained)
# LOST loop
lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(lost) <- c("chr", "start", "end")
lost.gr <- makeGRangesFromDataFrame(lost)
result <- runFisherExactCombination(interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr,
targetName,
retained.gr, lost.gr)
return(result)
}
runFisherExactCombinationTargetBinary <- function(targetName, interestName,
loop.upno.gr, loop.down.gr,
background.gr){
# RETAINED loop
retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(retained) <- c("chr", "start", "end")
retained.gr <- makeGRangesFromDataFrame(retained)
# LOST loop
lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(lost) <- c("chr", "start", "end")
lost.gr <- makeGRangesFromDataFrame(lost)
result <- runFisherExactCombinationBinary(interestName,
loop.upno.gr, loop.down.gr,
background.gr,
targetName,
retained.gr, lost.gr)
return(result)
}
runFisherExactCombinationTargetRetained <- function(targetName, interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr){
# RETAINED loop
retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(retained) <- c("chr", "start", "end")
retained.gr <- makeGRangesFromDataFrame(retained)
# LOST loop
result <- runFisherExactCombinationRetained(interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr,
targetName,
retained.gr)
return(result)
}
#### Importing loops of interest
interestName <- "allLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loopNum <- nrow(loop.no)
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
loop.down <- loop.all %>% dplyr::filter(diff_dTAG_DMSO < -0.2) %>% dplyr::arrange(diff_dTAG_DMSO) %>% slice_head(n = loopNum)
loop.down.gr <- (extractAnchor(loop.down))
#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTarget("ESRRB_NCB", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTarget("ESRRB_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp6 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp7 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp8 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp9 <- runFisherExactCombinationTarget("NANOG_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp10 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp11 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp12 <- runFisherExactCombinationTarget("OCT4_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp13 <- runFisherExactCombinationTarget("SMC1", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp14 <- runFisherExactCombinationTarget("SOX2_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp15 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp16 <- runFisherExactCombinationTarget("SOX2_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp17 <- runFisherExactCombinationTarget("TBP", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
temp6, temp7, temp8, temp9, temp10,
temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 5),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5),
labels = c("0", "1", "2", "3", "4", "5")))
#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
temp6, temp7, temp8, temp9, temp10)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 3),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3),
labels = c("0", "1", "2", "3")))
#### Importing loops of interest
interestName <- "regLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# UPNO loop
loop.upno <- bind_rows(loop.up, loop.no)
loop.upno.gr <- extractAnchor(loop.upno)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))
targetName <- "H3K27ac_effie"
# RETAINED loop
retained <- fread(here(refDir, "TF_bookmarking", targetName, "am_a.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(retained) <- c("chr", "start", "end")
retained.gr <- makeGRangesFromDataFrame(retained)
# LOST loop
lost <- fread(here(refDir, "TF_bookmarking", targetName, "oa.bed")) %>% dplyr::select(c(1, 2, 3))
colnames(lost) <- c("chr", "start", "end")
lost.gr <- makeGRangesFromDataFrame(lost)
# SEEDING
temp <- runFisherExact(loop.upno.gr, background.gr, retained.gr)
result.tb <- tibble(interest = paste0(interestName, "_UP/NO"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, retained.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_DOWN"),
target = paste0(targetName, "_retained"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.upno.gr, background.gr, lost.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_UP/NO"),
target = paste0(targetName, "_lost"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# ADDING ROWS
temp <- runFisherExact(loop.down.gr, background.gr, lost.gr)
result.tb <- result.tb %>%
add_row(interest = paste0(interestName, "_DOWN"),
target = paste0(targetName, "_lost"),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
result.tb <- result.tb %>%
mutate(interest = recode(interest,
"regLoop_UP/NO" = "UP/NO",
"regLoop_DOWN" = "DOWN"),
target = recode(target,
"H3K27ac_effie_retained" = "Retained",
"H3K27ac_effie_lost" = "Lost"))
result.tb$interest <- factor(result.tb$interest, levels = c("UP/NO", "DOWN"))
p <- ggplot(result.tb, aes(x = interest, y = target, size = -log10(pvalue), fill = oddsRatio)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 0.5*ptToMM # Line width for the border
) + theme_bw() +
scale_size_continuous(range = c(1, 3)) + # Set min and max point sizes here
scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"), # Define gradient colors
values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5),
#low = "white", high = "#CB333A",
# limits = c(1, 3),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- here(figDir, "heatmap_H3K27ac_bookmarking_reg_dotplot")
width <- panelSize(1.7)*mmToInch
height <- panelSize(1.1)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
#### Importing loops of target
temp1 <- runFisherExactCombinationTargetBinary("ESRRB_dsg", interestName,
loop.upno.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTargetBinary("H3K27ac_effie", interestName,
loop.upno.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTargetBinary("KLF4_effie", interestName,
loop.upno.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTargetBinary("OCT4_effie", interestName,
loop.upno.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTargetBinary("SOX2_effie", interestName,
loop.upno.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5)
result.tb <- data %>%
mutate(interest = recode(interest,
"regLoop_UP/NO" = "UP/NO",
"regLoop_DOWN" = "DOWN"),
target = recode(target,
"H3K27ac_effie_retained" = "H3K27ac_retained",
"H3K27ac_effie_lost" = "H3K27ac_lost",
"ESRRB_dsg_retained" = "ESRRB_retained",
"ESRRB_dsg_lost" = "ESRRB_lost",
"KLF4_effie_retained" = "KLF4_retained",
"KLF4_effie_lost" = "KLF4_lost",
"OCT4_effie_retained" = "OCT4_retained",
"OCT4_effie_lost" = "OCT4_lost",
"SOX2_effie_retained" = "SOX2_retained",
"SOX2_effie_lost" = "SOX2_lost"))
result.tb$interest <- factor(result.tb$interest, levels = c("UP/NO", "DOWN"))
result.tb$target <- factor(result.tb$target, levels = rev(c("H3K27ac_retained", "H3K27ac_lost",
"ESRRB_retained", "ESRRB_lost",
"KLF4_retained", "KLF4_lost",
"OCT4_retained", "OCT4_lost",
"SOX2_retained","SOX2_lost")))
p <- ggplot(result.tb, aes(x = interest, y = target, size = -log10(pvalue), fill = oddsRatio)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 1*ptToMM # Line width for the border
) + theme_bw() +
scale_size_continuous(range = c(1, 3)) + # Set min and max point sizes here
scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"), # Define gradient colors
values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5),
#low = "white", high = "#CB333A",
# limits = c(1, 3),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- here(figDir, "heatmap_alltarget_bookmarking_reg_dotplot")
width <- panelSize(2.2)*mmToInch
height <- panelSize(2)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTarget("ESRRB_NCB", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTarget("ESRRB_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp6 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp7 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp8 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp9 <- runFisherExactCombinationTarget("NANOG_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp10 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp11 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp12 <- runFisherExactCombinationTarget("OCT4_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp13 <- runFisherExactCombinationTarget("SMC1", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp14 <- runFisherExactCombinationTarget("SOX2_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp15 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp16 <- runFisherExactCombinationTarget("SOX2_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp17 <- runFisherExactCombinationTarget("TBP", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
temp6, temp7, temp8, temp9, temp10,
temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 5),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5),
labels = c("0", "1", "2", "3", "4", "5")))
#### Importing loops of target
temp1 <- runFisherExactCombinationTargetRetained("ATAC_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTargetRetained("CTCF", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTargetRetained("ESRRB_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTargetRetained("ESRRB_NCB", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTargetRetained("ESRRB_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp6 <- runFisherExactCombinationTargetRetained("H3K27ac_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp7 <- runFisherExactCombinationTargetRetained("KLF4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp8 <- runFisherExactCombinationTargetRetained("NANOG_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp9 <- runFisherExactCombinationTargetRetained("NANOG_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp10 <- runFisherExactCombinationTargetRetained("OCT4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp11 <- runFisherExactCombinationTargetRetained("OCT4_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp12 <- runFisherExactCombinationTargetRetained("OCT4_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp13 <- runFisherExactCombinationTargetRetained("SMC1", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp14 <- runFisherExactCombinationTargetRetained("SOX2_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp15 <- runFisherExactCombinationTargetRetained("SOX2_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp16 <- runFisherExactCombinationTargetRetained("SOX2_pfa", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp17 <- runFisherExactCombinationTargetRetained("TBP", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
temp6, temp7, temp8, temp9, temp10,
temp11, temp12, temp13, temp14, temp15, temp16 ,temp17)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 3),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3),
labels = c("0", "1", "2", "3")))
#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
temp6, temp7, temp8, temp9, temp10)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 3),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3),
labels = c("0", "1", "2", "3")))
#### Importing loops of target
temp1 <- runFisherExactCombinationTargetRetained("ATAC_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTargetRetained("CTCF", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTargetRetained("ESRRB_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTargetRetained("H3K27ac_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTargetRetained("KLF4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp6 <- runFisherExactCombinationTargetRetained("NANOG_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp7 <- runFisherExactCombinationTargetRetained("OCT4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp8 <- runFisherExactCombinationTargetRetained("SMC1", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp9 <- runFisherExactCombinationTargetRetained("SOX2_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp10 <- runFisherExactCombinationTargetRetained("TBP", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
temp6, temp7, temp8, temp9, temp10)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 3),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3),
labels = c("0", "1", "2", "3")))
#### Importing loops of interest
interestName <- "allLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))
#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
temp6, temp7, temp8, temp9, temp10)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 3),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3),
labels = c("0", "1", "2", "3")))
#### Importing loops of interest
interestName <- "regLoop"
# BACKGROUND loop
background <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
background.gr <- (extractAnchor(background))
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
loop.up.gr <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
loop.no.gr <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
loop.down.gr <- (extractAnchor(loop.down))
fwrite(as_tibble(loop.up.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
fwrite(as_tibble(loop.no.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
fwrite(as_tibble(loop.down.gr) %>% dplyr::select(seqnames, start, end), here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.anchor.bed"), col.names = FALSE, sep = "\t")
#### Importing loops of target
temp1 <- runFisherExactCombinationTarget("ATAC_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp2 <- runFisherExactCombinationTarget("CTCF", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp3 <- runFisherExactCombinationTarget("ESRRB_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp4 <- runFisherExactCombinationTarget("H3K27ac_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp5 <- runFisherExactCombinationTarget("KLF4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp6 <- runFisherExactCombinationTarget("NANOG_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp7 <- runFisherExactCombinationTarget("OCT4_effie", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp8 <- runFisherExactCombinationTarget("SMC1", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp9 <- runFisherExactCombinationTarget("SOX2_dsg", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
temp10 <- runFisherExactCombinationTarget("TBP", interestName,
loop.up.gr, loop.no.gr, loop.down.gr,
background.gr)
data <- bind_rows(temp1, temp2, temp3, temp4, temp5,
temp6, temp7, temp8, temp9, temp10)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 3),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3),
labels = c("0", "1", "2", "3")))
2.5kb from TSS, at least 1 bp overlap #### 2i
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
TRUE ~ "NO"))
temp <- diff.RNA %>% dplyr::mutate(start = TSS - 2500,
end = TSS + 2500) %>%
dplyr::select(chr, start, end, ensembl_gene_id, diff)
down.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN"))
up.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "UP"))
no.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "NO"))
all.gr <- makeGRangesFromDataFrame(temp)
data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
up.gr, no.gr, down.gr,
all.gr)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 2),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2),
labels = c("0", "1", "2")))
## GO TEST
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
TRUE ~ "NO"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "DOWN"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
downStemGene <- unique(unlist((as.data.frame(GO) %>% dplyr::slice(c(4, 5, 14, 66)) %>% dplyr::mutate(geneID = strsplit(geneID, "/")))$geneID))
downStem.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(ensembl_gene_id %in% downStemGene))
print(dotplot(GO, showCategory = 15, title = "") +
scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") +
scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
downNONStem.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN",
!(ensembl_gene_id %in% downStemGene)))
print(dotplot(GO, showCategory = 15, title = "") +
scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") +
scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
### Checking bookmarking only in stem gene
data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
down.gr, downNONStem.gr, downStem.gr,
all.gr)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 2),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2),
labels = c("0", "1", "2")))
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.Epi.A485_vs_G1.Epi.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
TRUE ~ "NO"))
temp <- diff.RNA %>% dplyr::mutate(start = TSS - 2500,
end = TSS + 2500) %>%
dplyr::select(chr, start, end, ensembl_gene_id, diff)
down.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "DOWN"))
up.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "UP"))
no.gr <- makeGRangesFromDataFrame(temp %>% dplyr::filter(diff == "NO"))
all.gr <- makeGRangesFromDataFrame(temp)
data <- runFisherExactCombinationTarget("H3K27ac_effie", "TSS",
up.gr, no.gr, down.gr,
all.gr)
# Visualization
library(circlize)
heatmap_data <- data %>% dplyr::select(target, interest, oddsRatio) %>%
pivot_wider(names_from = interest, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, interest, pvalue) %>%
pivot_wider(names_from = interest, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 2),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2),
labels = c("0", "1", "2")))
## GO
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.Epi.A485_vs_G1.Epi.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
TRUE ~ "NO"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "DOWN"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") +
scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
geneList <- (diff.RNA %>% dplyr::filter(diff == "UP"))$ensembl_gene_id
GO <- enrichGO(gene = geneList, OrgDb = org.Mm.eg.db, keyType = "ENSEMBL", ont = "BP")
GO.readable <- setReadable(GO, OrgDb = org.Mm.eg.db)
print(dotplot(GO, showCategory = 15, title = "") +
scale_color_continuous(limits = c(0, 0.05), low = "red", high = "black"))
This doesn’t have to be restricted to P-N loops. Let’s check all loops first. Also instead of density (mean), median could be better choice. Sum is not appropriate here since the size of anchors are not same. #### Limited to peaks to reduce noise ##### dTAG
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(loop ==group1) )$sumScore
distance2 <- (data %>% dplyr::filter(loop ==group2) )$sumScore
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
name <- "chromo_cons_annoHierarchy"
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# UP NO
loop.upno <- bind_rows(loop.up, loop.no)
anchor.upno <- (extractAnchor(loop.upno))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
getSumScores <- function(track, anchor) {
# Find overlaps between all anchors and track regions at once
overlaps <- findOverlaps(anchor, track)
# Extract the scores and corresponding anchor indices
anchor_indices <- queryHits(overlaps)
track_scores <- score(track)[subjectHits(overlaps)]
# Use tapply to calculate the median scores for each anchor
median_scores <- tapply(track_scores, anchor_indices, mean, na.rm = TRUE)
# Initialize a numeric vector to store the median scores for each anchor
all_median_scores <- rep(NA, length(anchor))
# Populate the median scores for the anchors that have overlaps
all_median_scores[as.numeric(names(median_scores))] <- median_scores
return(all_median_scores)
}
plotSumScores <- function(track, peak, name){
peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
a <- getSumScores(peakTrack, anchor.up)
b <- getSumScores(peakTrack, anchor.no)
c <- getSumScores(peakTrack, anchor.down)
a.tb <- tibble(loop = "UP",
sumScore = a)
b.tb <- tibble(loop = "NO",
sumScore = b)
c.tb <- tibble(loop = "DOWN",
sumScore = c)
data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
ggplot(data, aes(x = loop, y = sumScore)) +
labs(x = NULL, y = paste0(name, " average peak score per anchor")) +
geom_violin(aes(fill = group), color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.size = 1, outlier.stroke = NA) + theme_classic() +
stat_summary(
aes(group = loop), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) +
coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.95))) + theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
}
plotSumScoresBinary <- function(track, peak, name, anchor.upno, anchor.down){
peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
b <- getSumScores(peakTrack, anchor.upno)
c <- getSumScores(peakTrack, anchor.down)
b.tb <- tibble(loop = "UP/NO",
sumScore = b)
c.tb <- tibble(loop = "DOWN",
sumScore = c)
data <- bind_rows(b.tb, c.tb) %>% drop_na()
data$loop <- factor(data$loop, levels = c("UP/NO", "DOWN"))
p12 <- getPvalWilcox(data, "UP/NO", "DOWN")
p <- ggplot(data, aes(x = name, fill = loop, y = sumScore)) +
labs(x = NULL, y = paste0("Average ChIP peak score at anchor)")) +
introdataviz::geom_split_violin(linewidth = lineThick * mmToLineUnit, lineend = "square",
alpha = .4) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) + theme_classic() +
stat_summary(
aes(group = loop), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) + theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 0,
),
axis.text.y = element_text(
size = fontSizeS,
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +
coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.9))) +
annotate(
"text", x = 1, y = quantile(data$sumScore, 0.5),
label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3
) + guides(
fill = guide_legend(
keywidth = 0.2, # Adjust the width of the legend keys
keyheight = 0.2 # Adjust the height of the legend keys
)
)
fileName <- paste0("ChIP_peak_avgPeakScore_", name)
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.2)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "H3K4me3")
plotSumScoresBinary(track, peak, "H3K4me3", anchor.upno, anchor.down)
track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
#plotSumScores(track, peak, "H3K27ac")
plotSumScoresBinary(track, peak, "H3K27ac", anchor.upno, anchor.down)
track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
#plotSumScores(track, peak, "H3K14ac")
plotSumScoresBinary(track, peak, "H3K14ac", anchor.upno, anchor.down)
track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "CTCF")
plotSumScoresBinary(track, peak, "CTCF", anchor.upno, anchor.down)
track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "RAD21")
plotSumScoresBinary(track, peak, "RAD21", anchor.upno, anchor.down)
name <- "chromo_cons_annoHierarchy"
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
getSumScores <- function(track, anchor) {
# Find overlaps between all anchors and track regions at once
overlaps <- findOverlaps(anchor, track)
# Extract the scores and corresponding anchor indices
anchor_indices <- queryHits(overlaps)
track_scores <- score(track)[subjectHits(overlaps)]
# Use tapply to calculate the median scores for each anchor
median_scores <- tapply(track_scores, anchor_indices, sum, na.rm = TRUE)
# Initialize a numeric vector to store the median scores for each anchor
all_median_scores <- rep(NA, length(anchor))
# Populate the median scores for the anchors that have overlaps
all_median_scores[as.numeric(names(median_scores))] <- median_scores
return(all_median_scores)
}
plotSumScores <- function(track, peak, name){
peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
a <- getSumScores(peakTrack, anchor.up)
b <- getSumScores(peakTrack, anchor.no)
c <- getSumScores(peakTrack, anchor.down)
a.tb <- tibble(loop = "UP",
sumScore = a)
b.tb <- tibble(loop = "NO",
sumScore = b)
c.tb <- tibble(loop = "DOWN",
sumScore = c)
data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
ggplot(data, aes(x = loop, y = sumScore)) + geom_boxplot(outlier.shape = NA) + theme_classic() + ggtitle(paste0(name, " sum peak")) +
coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))
}
# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
plotSumScores(track, peak, "H3K4me3")
track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
plotSumScores(track, peak, "H3K27ac")
track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
plotSumScores(track, peak, "H3K14ac")
track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
plotSumScores(track, peak, "CTCF")
track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak.temp <- fread(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed")) %>%
dplyr::mutate(V2 = V2 - 1000,
V3 = V3 + 1000)
colnames(peak.temp) <- c("chr", "start", "end")
peak <- makeGRangesFromDataFrame(peak.temp)
plotSumScores(track, peak, "YY1")
### For those with only summit
track <- import(here(refDir, "GSM5571895_ESC_YY1_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM5571895_ESC_YY1_summit.bed"))
plotSumScores(track, peak, "RAD21")
### FOR THOSE WITHOUT PEAKS
track <- import(here(refDir, "GSM2082708_ESC.H3K27me3.1_mm10Lifted.black.bw"), format = "BigWig")
a <- getSumScores(track, anchor.up)
b <- getSumScores(track, anchor.no)
c <- getSumScores(track, anchor.down)
a.tb <- tibble(loop = "UP",
sumScore = a)
b.tb <- tibble(loop = "NO",
sumScore = b)
c.tb <- tibble(loop = "DOWN",
sumScore = c)
data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
ggplot(data, aes(x = loop, y = sumScore)) + geom_boxplot(outlier.shape = NA) + theme_classic() + ggtitle(paste0("H3K27me3 sum no peak")) +
coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))
name <- "chromo_cons_annoHierarchy"
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
peak.H3K4me3 <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
peak.temp <- fread(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed")) %>%
dplyr::mutate(V2 = V2 - 1000,
V3 = V3 + 1000)
colnames(peak.temp) <- c("chr", "start", "end")
peak.RAD21 <- makeGRangesFromDataFrame(peak.temp)
calculatePeakDensity <- function(peak, note){
overlap_counts <- countOverlaps(anchor.up, peak)
anchor_widths <- (width(anchor.up)-1)/1000
density <- overlap_counts / anchor_widths
data1 <- tibble(type = "UP",
densityPerKb = density)
overlap_counts <- countOverlaps(anchor.no, peak)
anchor_widths <- (width(anchor.no)-1)/1000
density <- overlap_counts / anchor_widths
mcols(anchor.no)$density <- density
data2 <- tibble(type = "NO",
densityPerKb = density)
overlap_counts <- countOverlaps(anchor.down, peak)
anchor_widths <- (width(anchor.down)-1)/1000
density <- overlap_counts / anchor_widths
mcols(anchor.down)$density <- density
data3 <- tibble(type = "DOWN",
densityPerKb = density)
data <- bind_rows(data1, data2, data3)
ggplot(data, aes(x = type, y = densityPerKb)) + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1 ) + theme_bw() + ggtitle(note) +
coord_cartesian(ylim = c(0, quantile(data$sumScore, 0.9)))
}
calculatePeakDensity(peak.H3K4me3, "H3K4me3")
calculatePeakDensity(peak.H3K27ac, "H3K27ac")
calculatePeakDensity(peak.CTCF, "CTCF")
calculatePeakDensity(peak.RAD21, "RAD21")
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
anchor <- extractAnchor(loop)
### Whyte
n <- length(unique(subjectHits(findOverlaps(anchor, peak.Whyte.SE))))
total <- length(peak.Whyte.SE)
counts <- c(n, total - n)
labels <- paste(c("covered", "NOT covered"), counts)
pie(counts, labels = labels, main = paste0("Whyte SE coverage \n", total), col = c("grey", "white"))
fileName <- here(figDir, "pie_SE_Whyte")
width <- panelSize(4)*mmToInch
height <- panelSize(4)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# pie(counts, labels = labels, main = paste0("Whyte SE coverage \n", total), col = c("grey", "white"),
# cex = 1, cex.main = 1)
# dev.off()
### Dylan
n <- length(unique(subjectHits(findOverlaps(anchor, peak.Dylan.SE))))
total <- length(peak.Dylan.SE)
counts <- c(n, total - n)
labels <- paste(c("covered", "NOT covered"), counts)
pie(counts, labels = labels, main = paste0("Dylan SE coverage \n", total), col = c("grey", "white"))
fileName <- here(figDir, "pie_SE_Murphy")
width <- panelSize(4)*mmToInch
height <- panelSize(4)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# pie(counts, labels = labels, main = paste0("Murphy SE coverage \n", total), col = c("grey", "white"))
#
# dev.off()
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
getOverlapLoopNum <- function(loop, peak){
anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
a <- queryHits(findOverlaps(anchor1, peak))
b <- queryHits(findOverlaps(anchor2, peak))
return(length(unique(c(a, b))))
}
getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
all.overlap <- getOverlapLoopNum(allLoop, peak)
all.notOverlap <- nrow(allLoop) - all.overlap
subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
subset.notOverlap <- nrow(subsetLoop) - subset.overlap
contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
rownames(contingency_table) <- c("All loops", "Subset loops")
# Perform Fisher's Exact Test
fisher_test_result <- fisher.test(contingency_table)
return(fisher_test_result)
}
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.upno <- bind_rows(loop.up, loop.no)
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
### Dylan
# Seeding
temp <- getSEOverlapFisher(loop, loop.upno, peak.Dylan.SE)
result.tb <- tibble(loopType = "UP/NO",
target = "Dylan SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# temp <- getSEOverlapFisher(loop, loop.up, peak.Dylan.SE)
# result.tb <- tibble(loopType = "UP",
# target = "Dylan SE",
# pvalue = temp$p.value,
# oddsRatio = temp$estimate)
# Add row
# temp <- getSEOverlapFisher(loop, loop.no, peak.Dylan.SE)
# result.tb <- result.tb %>%
# add_row(loopType = "NO",
# target = "Dylan SE",
# pvalue = temp$p.value,
# oddsRatio = temp$estimate)
temp <- getSEOverlapFisher(loop, loop.down, peak.Dylan.SE)
result.tb <- result.tb %>%
add_row(loopType = "DOWN",
target = "Dylan SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
### Whyte
# Seeding
# temp <- getSEOverlapFisher(loop, loop.up, peak.Whyte.SE)
# result.tb <- result.tb %>%
# add_row(loopType = "UP",
# target = "Whyte SE",
# pvalue = temp$p.value,
# oddsRatio = temp$estimate)
# Add row
temp <- getSEOverlapFisher(loop, loop.upno, peak.Whyte.SE)
result.tb <- result.tb %>%
add_row(loopType = "UP/NO",
target = "Whyte SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
temp <- getSEOverlapFisher(loop, loop.down, peak.Whyte.SE)
result.tb <- result.tb %>%
add_row(loopType = "DOWN",
target = "Whyte SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# Visualization
library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
pivot_wider(names_from = loopType, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 2),
c("#4852A0", "white", "#CB333A"))
p <- Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = fontSizeS, fontfamily = fontType))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
row_names_gp = gpar(fontsize = fontSizeS, fontfamily = fontType),
column_names_gp = gpar(fontsize = fontSizeS, fontfamily = fontType),
heatmap_legend_param = list(
at = c(0, 1, 2),
labels = c("0", "1", "2"),
title_gp = gpar(fontfamily = fontType, fontsize = fontSizeS),
labels_gp = gpar(fontfamily = fontType, fontsize = fontSizeS)
)
)
# fileName <- here(figDir, "heatmap_SE_enrichment")
# width <- panelSize(1.5)*mmToInch
# height <- panelSize(0.7)*mmToInch
# svglite(paste0(fileName, ".svg"), width = width, height =height)
# print(p)
# dev.off()
# png(paste0(fileName, ".png"), width = width, height =height, res = 600, unit = "in")
# print(p)
# dev.off()
#######################
data$loopType <- factor(data$loopType, levels = c("UP/NO", "DOWN"))
data$target <- factor(data$target, levels = c("Whyte SE", "Dylan SE"))
p <- ggplot(data, aes(x = loopType, y = target, size = -log10(pvalue), fill = oddsRatio)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 1*ptToMM # Line width for the border
) + theme_bw() +
scale_size_continuous(range = c(1, 3)) + # Set min and max point sizes here
scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"), # Define gradient colors
values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5),
#low = "white", high = "#CB333A",
# limits = c(1, 3),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) +
labs(x = NULL, y = NULL) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- here(figDir, "heatmap_SE_enrichment_dotplot")
width <- panelSize(1.8)*mmToInch
height <- panelSize(1.1)*mmToInch
# png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# print(p)
# dev.off()
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
getOverlapLoopNum <- function(loop, peak){
anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
a <- queryHits(findOverlaps(anchor1, peak))
b <- queryHits(findOverlaps(anchor2, peak))
return(length(unique(c(a, b))))
}
getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
all.overlap <- getOverlapLoopNum(allLoop, peak)
all.notOverlap <- nrow(allLoop) - all.overlap
subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
subset.notOverlap <- nrow(subsetLoop) - subset.overlap
contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
rownames(contingency_table) <- c("All loops", "Subset loops")
# Perform Fisher's Exact Test
fisher_test_result <- fisher.test(contingency_table)
return(fisher_test_result)
}
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
### Dylan
# Seeding
temp <- getSEOverlapFisher(loop, loop.up, peak.Dylan.SE)
result.tb <- tibble(loopType = "UP",
target = "Dylan SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# Add row
temp <- getSEOverlapFisher(loop, loop.no, peak.Dylan.SE)
result.tb <- result.tb %>%
add_row(loopType = "NO",
target = "Dylan SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
temp <- getSEOverlapFisher(loop, loop.down, peak.Dylan.SE)
result.tb <- result.tb %>%
add_row(loopType = "DOWN",
target = "Dylan SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
### Whyte
# Seeding
temp <- getSEOverlapFisher(loop, loop.up, peak.Whyte.SE)
result.tb <- result.tb %>%
add_row(loopType = "UP",
target = "Whyte SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# Add row
temp <- getSEOverlapFisher(loop, loop.no, peak.Whyte.SE)
result.tb <- result.tb %>%
add_row(loopType = "NO",
target = "Whyte SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
temp <- getSEOverlapFisher(loop, loop.down, peak.Whyte.SE)
result.tb <- result.tb %>%
add_row(loopType = "DOWN",
target = "Whyte SE",
pvalue = temp$p.value,
oddsRatio = temp$estimate)
# Visualization
library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
pivot_wider(names_from = loopType, values_from = pvalue) %>%
column_to_rownames(var = "target")
col_fun <- colorRamp2(c(0, 1, 5),
c("blue", "white", "red"))
Heatmap(as.matrix(heatmap_data),
name = "Odds Ratio",
col = col_fun,
# Add annotation for p-values
cell_fun = function(j, i, x, y, width, height, fill) {
pval <- pvalue_data[i, j]
label <- ifelse(pval > 0.05, "n.s.", sprintf("%.2e", pval))
grid.text(label, x, y, gp = gpar(fontsize = 10))
},
# Customize the heatmap layout
cluster_rows = TRUE,
show_row_dend = FALSE,
cluster_columns = FALSE,
column_title = "Interest",
row_title = "Target",
heatmap_legend_param = list(at = c(0, 1, 2, 3, 4, 5),
labels = c("0", "1", "2", "3", "4", "5")))
The question I want to ask here is whether perturbed loops in dTAG experiments are either perturbed or not perturbed in A485 experiment. If there is compensation going on between RAD21 and A485, UP loop in RAD21 should be more DOWN in A485 and vice versa #### Exploratory part
diffCutoff <- 0.2
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
temp <- data
temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
temp <- temp %>% dplyr::arrange(density)
p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
geom_hline(yintercept = diffCutoff, alpha = 1, color = "black") +
geom_hline(yintercept = -diffCutoff, alpha = 1, color = "black") +
geom_vline(xintercept = diffCutoff, alpha = 1, color = "black") +
geom_vline(xintercept = -diffCutoff, alpha = 1, color = "black") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) +
theme_classic()
fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_dTAG-", diffName)
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
print(p1)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 3.5, height = 3)
print(p1)
dev.off()
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(diff_A485_DMSO > diffCutoff, "UP",
ifelse(diff_A485_DMSO > -diffCutoff, "NO", "DOWN")))
makeAcrossSampleScatterplotdTAG <- function(data, AnnoList, diff, name, loopName, diffName){
temp <- data %>% dplyr::filter(Anno2 %in% AnnoList,
updown_dTAG_DMSO %in% diff)
temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
temp <- temp %>% dplyr::arrange(density)
p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) +
theme_classic() + ggtitle(paste0(name, "_", loopName, "_dTAG-", diffName)) + theme(plot.title = element_text(size = 5))
fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_dTAG-", diffName)
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
print(p1)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 3.5, height = 3)
print(p1)
dev.off()
}
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("UP", "NO", "DOWN"), name, "all", "all")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("UP"), name, "all", "UP")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("NO"), name, "all", "NO")
makeAcrossSampleScatterplotdTAG(data, unique(data$Anno2), c("DOWN"), name, "all", "DOWN")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("UP", "NO", "DOWN"), name, "pe-pe", "all")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("UP"), name, "pe-pe", "UP")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("NO"), name, "pe-pe", "NO")
makeAcrossSampleScatterplotdTAG(data, c("P-P", "P-E", "E-E"), c("DOWN"), name, "pe-pe", "DOWN")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("UP", "NO", "DOWN"), name, "str", "all")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("UP"), name, "str", "UP")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("NO"), name, "str", "NO")
makeAcrossSampleScatterplotdTAG(data, c("S-S", "S-X"), c("DOWN"), name, "str", "DOWN")
makeAcrossSampleScatterplotA485 <- function(data, AnnoList, diff, name, loopName, diffName){
temp <- data %>% dplyr::filter(Anno2 %in% AnnoList,
updown_A485_DMSO %in% diff)
temp$density <- get_density(temp$diff_dTAG_DMSO, temp$diff_A485_DMSO, n = 100)
temp <- temp %>% dplyr::arrange(density)
p1 <- ggplot(temp, aes(x = diff_dTAG_DMSO, y = diff_A485_DMSO, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +coord_fixed(ratio = 1, ylim = c(-1, 1), xlim = c(-1, 1)) +
theme_classic() + ggtitle(paste0(name, "_", loopName, "_A485G-", diffName)) + theme(plot.title = element_text(size = 5))
fileName <- paste0("scatterplot_", name, "_", diffCutoff, "_diff_", loopName, "_A485-", diffName)
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 3.5, height = 3)
print(p1)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 3.5, height = 3)
print(p1)
dev.off()
}
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("UP", "NO", "DOWN"), name, "all", "all")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("UP"), name, "all", "UP")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("NO"), name, "all", "NO")
makeAcrossSampleScatterplotA485(data, unique(data$Anno2), c("DOWN"), name, "all", "DOWN")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("UP", "NO", "DOWN"), name, "pe-pe", "all")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("UP"), name, "pe-pe", "UP")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("NO"), name, "pe-pe", "NO")
makeAcrossSampleScatterplotA485(data, c("P-P", "P-E", "E-E"), c("DOWN"), name, "pe-pe", "DOWN")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("UP", "NO", "DOWN"), name, "str", "all")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("UP"), name, "str", "UP")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("NO"), name, "str", "NO")
makeAcrossSampleScatterplotA485(data, c("S-S", "S-X"), c("DOWN"), name, "str", "DOWN")
######## Barplot
temp <- data %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO)
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = 0.2, linetype = "dashed") +
geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))
temp <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO)
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = 0.2, linetype = "dashed") +
geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))
temp <- data %>% dplyr::filter(Anno2 %in% c("S-S", "S-X")) %>% dplyr::select(updown_dTAG_DMSO, diff_A485_DMSO)
ggplot(temp, aes(x = updown_dTAG_DMSO, y = diff_A485_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = 0.2, linetype = "dashed") +
geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))
###
temp <- data %>% dplyr::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = 0.2, linetype = "dashed") +
geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))
temp <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>% dplyr:::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = 0.2, linetype = "dashed") +
geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))
temp <- data %>% dplyr::filter(Anno2 %in% c("S-S", "S-X")) %>% dplyr:::select(updown_A485_DMSO, diff_dTAG_DMSO)
ggplot(temp, aes(x = updown_A485_DMSO, y = diff_dTAG_DMSO)) + geom_boxplot(outlier.shape = NA) + theme_classic() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = 0.2, linetype = "dashed") +
geom_hline(yintercept = -0.2, linetype = "dashed") + coord_cartesian(ylim = c(-0.5, 0.5))
Q. What are the changes in compartment level upon A485 treatment? #### Compartment change distribution
### IMPORTING COMPARMTNET SCORES
compDir <- here("../..", "result", "compartment", "CscoreTools")
cscore.DMSO <- as_tibble(fread(here(compDir, "G1DMSO_Merged_10kb_cscore_final.bedgraph"), skip = 1)) %>%
dplyr::mutate(V2 = V2 + 1)
colnames(cscore.DMSO) <- c("chr", "start", "end", "cscore_DMSO")
cscore.A485 <- as_tibble(fread(here(compDir, "G1A485_Merged_10kb_cscore_final.bedgraph"), skip = 1)) %>%
dplyr::mutate(V2 = V2 + 1)
colnames(cscore.A485) <- c("chr", "start", "end", "cscore_A485")
cscore <- dplyr::full_join(cscore.DMSO, cscore.A485, by = c("chr", "start", "end"))
# FILTER ROWS WITH NA
cscore <- cscore %>% filter(!if_any(everything(), is.na))
# Annotating how the compartment changed
threshold <- 0.1
cscore <- cscore %>% rowwise() %>% dplyr::mutate(isAB_DMSO = ifelse(cscore_DMSO > 0, "A", "B"),
isAB_A485 = ifelse(cscore_A485 > 0, "A", "B"),
changeType = case_when(
abs(cscore_DMSO - cscore_A485) < threshold ~ "Unchanged",
isAB_DMSO == "A" & isAB_A485 == "B" ~ "AtoB",
isAB_DMSO == "B" & isAB_A485 == "A" ~ "BtoA",
isAB_DMSO == "A" & isAB_A485 == "A" & abs(cscore_DMSO) > abs(cscore_A485) ~ "A_weakening",
isAB_DMSO == "A" & isAB_A485 == "A" & abs(cscore_DMSO) <= abs(cscore_A485) ~ "A_strengthening",
isAB_DMSO == "B" & isAB_A485 == "B" & abs(cscore_DMSO) > abs(cscore_A485) ~ "B_weakening",
isAB_DMSO == "B" & isAB_A485 == "B" & abs(cscore_DMSO) <= abs(cscore_A485) ~ "B_strengthening" ))
cscore_summary <- tibble(comparison = rep("A485_vs_DMSO", 7),
changeType = c("AtoB", "BtoA", "A_weakening", "A_strengthening", "B_weakening", "B_strengthening", "Unchanged"),
value = c(sum(cscore$changeType == "AtoB"),
sum(cscore$changeType == "BtoA"),
sum(cscore$changeType == "A_weakening"),
sum(cscore$changeType == "A_strengthening"),
sum(cscore$changeType == "B_weakening"),
sum(cscore$changeType == "B_strengthening"),
sum(cscore$changeType == "Unchanged")))
cscore_summary$changeType <- factor(cscore_summary$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
"B_weakening", "BtoA", "A_strengthening",
"Unchanged"))
ggplot(cscore_summary, aes(x = comparison, y = value, fill = changeType)) + geom_bar(position = "stack", stat = "identity") +
theme_bw() +
scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V1, TSS, V6)
colnames(gene.tb) <- c("chr", "TSS", "ensembl")
diff.RNA <- fread(here(refDir, "diff_G1.A485.selected2_G1.2i.A485_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
diff.RNA <- diff.RNA %>% dplyr::left_join(gene.tb, by = c("ensembl_gene_id" = "ensembl")) %>%
dplyr::filter(!is.na(TSS))
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA <- diff.RNA %>% dplyr::mutate(diff = case_when(padj < alpha & shrinked_log2FC > fcCutoff ~ "UP",
padj < alpha & shrinked_log2FC < -fcCutoff ~ "DOWN",
TRUE ~ "NO"))
getCompChangeType <- function(chrom, TSS, cscore.tb){
temp.tb <- cscore.tb %>% dplyr::filter(chr == chrom, start < TSS, end > TSS)
out <- temp.tb$changeType
if(length(out) < 1){
return(NA)
}else{
return(out)
}
}
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
temp <-diff.RNA %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
dplyr::select(chr, start, end)
diff.RNA.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(diff.RNA.gr, cscore.gr)
#cscore.selected <- cscore %>% dplyr::slice(subjectHits(overlap))
## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(diff.RNA[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
missed <- diff.RNA[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(TSS = TSS + 10000) %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
dplyr::select(chr, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols(missed[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
missed2 <- missed[-queryHits(overlap)]
temp <- missed2 %>% dplyr::mutate(TSS = TSS - 10000) %>% dplyr::mutate(start = TSS, end = TSS +1) %>%
dplyr::select(chr, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch3 <- bind_cols(missed2[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
#missed3 <- missed2[-queryHits(overlap)]
diff.RNA <- bind_rows(batch1, batch2, batch3)
# VISUALIZE
diff.RNA$changeType <- factor(diff.RNA$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
"B_weakening", "BtoA", "A_strengthening",
"Unchanged"))
ggplot(diff.RNA, aes(x = diff, fill = changeType)) +
geom_bar(position = "fill") +
labs(title = "Stacked Bar Plot of Change Type by Diff",
x = "Diff",
y = "Count") +
theme_bw() + scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))
name <- "chromo_cons_annoHierarchy"
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_UP_diff0.2.bedpe"))
anchor.up <- (extractAnchor(loop.up))
anchor.up.tb <- as_tibble(anchor.up) %>%
dplyr::mutate(center = (start + end)/2) %>%
dplyr::mutate(start = center-1, end = center+1) %>%
dplyr::select(seqnames, start, end)
anchor.up <- makeGRangesFromDataFrame(anchor.up.tb)
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_NO_diff0.2.bedpe"))
anchor.no <- (extractAnchor(loop.no))
anchor.no.tb <- as_tibble(anchor.no) %>%
dplyr::mutate(center = (start + end)/2) %>%
dplyr::mutate(start = center-1, end = center+1) %>%
dplyr::select(seqnames, start, end)
anchor.no <- makeGRangesFromDataFrame(anchor.no.tb)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_A485vsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- (extractAnchor(loop.down))
anchor.down.tb <- as_tibble(anchor.down) %>%
dplyr::mutate(center = (start + end)/2) %>%
dplyr::mutate(start = center-1, end = center+1) %>%
dplyr::select(seqnames, start, end)
anchor.down <- makeGRangesFromDataFrame(anchor.down.tb)
########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.up, cscore.gr)
## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.up.tb)[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
missed <- data.table(anchor.up.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
data.up <- bind_rows(batch1, batch2)
########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.no, cscore.gr)
## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.no.tb)[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
missed <- data.table(anchor.no.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
data.no <- bind_rows(batch1, batch2)
########################################################################
### Overlap
cscore.gr <- makeGRangesFromDataFrame(cscore[1:3])
overlap <- findOverlaps(anchor.down, cscore.gr)
## Problem of certain genes not getting overlap with cscore due to sparse calling?
batch1 <- bind_cols(data.table(anchor.down.tb)[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
missed <- data.table(anchor.down.tb)[-queryHits(overlap)]
temp <- missed %>% dplyr::mutate(start = start - 10000, end = end - 10000) %>%
dplyr::select(seqnames, start, end)
missed.gr <- makeGRangesFromDataFrame(temp)
overlap <- findOverlaps(missed.gr, cscore.gr)
batch2 <- bind_cols((missed)[queryHits(overlap)],
data.table(cscore)[subjectHits(overlap)])
data.down <- bind_rows(batch1, batch2)
n.up <- nrow(data.up)
n.no <- nrow(data.no)
n.down <- nrow(data.down)
data <- tibble(loopDiff = c(rep("UP", n.up), rep("NO", n.no), rep("DOWN", n.down)),
changeType = c(data.up$changeType, data.no$changeType, data.down$changeType))
# VISUALIZE
data$changeType <- factor(data$changeType, levels = c("A_weakening", "AtoB", "B_strengthening",
"B_weakening", "BtoA", "A_strengthening",
"Unchanged"))
ggplot(data, aes(x = loopDiff, fill = changeType)) +
geom_bar(position = "fill") +
labs(title = "Stacked Bar Plot of Change Type by Diff",
x = "Diff",
y = "Count") +
theme_bw() + scale_fill_manual(values = c("skyblue", "blue", "darkblue", "pink", "red2", "darkred" ,"grey"))
resultDir <- here("../../result")
data <- fread(here(resultDir, "chromHMM", "A485_pe-pe_anchors", "overlap_enrich_100_state.txt"))
colnames(data) <- c("state", "genome", "pe-pe_A485_down", "pe-pe_A485_no", "pe-pe_A485_up")
data <- data %>% dplyr::select(-genome)
data_matrix <- data %>%
column_to_rownames(var = "state") %>%
as.matrix()
library(circlize)
col_fun <- colorRamp2(c(0, 1, 10),
c("blue", "white", "red"))
Heatmap(
data_matrix,
name = "Value",
show_row_names = TRUE,
show_column_names = TRUE,
cluster_columns = FALSE,
cluster_rows = FALSE,
col = col_fun
)
#######
data2 <- data %>% dplyr::select(c(1, 2, 4))
data2 <- data2 %>% dplyr::filter(`pe-pe_A485_down` > `pe-pe_A485_up`)
data_matrix <- data2 %>%
column_to_rownames(var = "state") %>%
as.matrix()
library(circlize)
col_fun <- colorRamp2(c(0, 1, 10),
c("blue", "white", "red"))
Heatmap(
data_matrix,
name = "Value",
show_row_names = TRUE,
show_column_names = TRUE,
cluster_columns = FALSE,
cluster_rows = FALSE,
col = col_fun
)
https://bioconductor.org/books/devel/OHCA/pages/visualization.html
### Getting loop ID
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(res = V3 - V2,
id = paste(V1, res, V2, V5, sep = "_"))
regID <- temp$id
temp <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(res = V3 - V2,
id = paste(V1, res, V2, V5, sep = "_"))
strID <- temp$id
############## REG
## Filtering loops to plot
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- data %>% dplyr::filter(id %in% regID, dTAG > 0.5) %>% dplyr::arrange(desc(dTAG))
temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)
# Visualization
hicDir <- "/Volumes/UKJIN_SSD/data_vault_2024summer_microC/hic"
windowSize <- 1*1e6
i = 1
# for(i in seq(1, 25)){
## Loading hic and plotting
chr <- as_tibble(loops[i])$seqnames1
center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
start <- floor(center - 0.5*windowSize)
end <- floor(center + 0.5*windowSize)
cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))
#cf.G1dTAG <- HicFile(path = here(hicDir, "G1.A485.Merged.hic"))
res <- 10*1000
zmax <- 2.5
hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
fileName <- here(figDir, paste0("visuzliation_regLoop_dTAG_", i, "_10kb_figureVer"))
width <- panelSize(20)*mmToInch
height <- panelSize(10)*mmToInch
png(paste0(fileName, ".png"), res = 600, units = "in", width = width, height = height)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
svglite(paste0(fileName, ".svg"), width = width, height = height)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
res <- 25*1000
zmax <- 3
start <- floor(center - windowSize)
end <- floor(center + windowSize)
hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
fileName <- here(figDir, paste0("visuzliation_regLoop_dTAG_", i, "_25kb"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
# svglite(paste0(fileName, ".svg"), width = 10, height = 5)
# print(cowplot::plot_grid(p1, p2, align = "h"))
# dev.off()
# }
############## Str
## Filtering loops to plot
data <- fread(here(consensusDir, "chromo_cons_score.tsv"))
temp <- data %>% dplyr::filter(id %in% strID, dTAG > 0.5) %>% dplyr::arrange(desc(dTAG))
temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)
# Visualization
hicDir <- "/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/data_vault_2024summer_microC/hic"
windowSize <- 2*1e6
for(i in seq(1, 25)){
## Loading hic and plotting
chr <- as_tibble(loops[i])$seqnames1
center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
start <- floor(center - 0.5*windowSize)
end <- floor(center + 0.5*windowSize)
cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))
res <- 10*1000
zmax <- 2.5
hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
fileName <- here(figDir, paste0("visuzliation_strLoop_dTAG_", i, "_10kb"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
res <- 25*1000
zmax <- 3
start <- floor(center - windowSize)
end <- floor(center + windowSize)
hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
fileName <- here(figDir, paste0("visuzliation_strLoop_dTAG_", i, "_25kb"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
}
############## dTAG called loops
## Filtering loops to plot
data1 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_25kb.tsv"))
data2 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_10kb.tsv"))
data3 <- fread(here(loopDir, "G1.dTAG.Merged_chromosight_5kb.tsv"))
data <- bind_rows(data1, data2, data3)
temp <- data %>% dplyr::arrange(desc(score))
temp <- temp %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
loops <- importBedpe(temp)
# Visualization
hicDir <- "/Volumes/UKJIN_SSD/Genomics_03_Analysis_Working/data_vault_2024summer_microC/hic"
windowSize <- 2*1e6
for(i in seq(1, 25)){
## Loading hic and plotting
chr <- as_tibble(loops[i])$seqnames1
center <- (as_tibble(loops[i])$start1 + as_tibble(loops[i])$end2)/2
start <- floor(center - 0.5*windowSize)
end <- floor(center + 0.5*windowSize)
cf.G1DMSO <- HicFile(path = here(hicDir, "G1.DMSO.Merged.hic"))
cf.G1dTAG <- HicFile(path = here(hicDir, "G1.dTAG.Merged.hic"))
res <- 10*1000
zmax <- 2.5
hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
fileName <- here(figDir, paste0("visuzliation_dTAGcalledLoop_dTAG_", i, "_10kb"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
res <- 25*1000
zmax <- 3
start <- floor(center - windowSize)
end <- floor(center + windowSize)
hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax), loop = loops)
p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax), loop = loops)
fileName <- here(figDir, paste0("visuzliation_dTAGcalledLoop_dTAG_", i, "_25kb"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 10, height = 5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
}
chr = "chr13"
start = 96900000
end = 98100000
res <- 10*1000
zmax <- 2.5
hic1 <- import(cf.G1DMSO, focus = paste0(chr, ":", start, "-", end), resolution = res)
hic2 <- import(cf.G1dTAG, focus = paste0(chr, ":", start, "-", end), resolution = res)
p1 <- plotMatrix(hic1, dpi = 1000, limits = c(0, zmax))
p2 <- plotMatrix(hic2, dpi = 1000, limits = c(0, zmax))
print(cowplot::plot_grid(p1, p2, align = "h"))
Strategy: check overlap of the union loops to the loops called at each condition #### Check sample specific called loops
library(circlize)
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
temp <- data %>% dplyr::select(seq(1, 6))
colnames(temp) <- c("V1", "V2", "V3", "V4", "V5", "V6")
cons.loop <- importBedpe(temp)
# Checking DMSO
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.DMSO.Merged_chromosight_", 5, "kb.bedpe"))))
overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)
index <- sort(unique(c(queryHits(overlap.25kb),
queryHits(overlap.10kb),
queryHits(overlap.5kb))))
data$calledByDMSO <- 0
data$calledByDMSO[index] <- 1
# Checking dTAG
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.dTAG.Merged_chromosight_", 5, "kb.bedpe"))))
overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)
index <- sort(unique(c(queryHits(overlap.25kb),
queryHits(overlap.10kb),
queryHits(overlap.5kb))))
data$calledBydTAG <- 0
data$calledBydTAG[index] <- 1
# Checking A485
loop.25kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 25, "kb.bedpe"))))
loop.10kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 10, "kb.bedpe"))))
loop.5kb <- importBedpe(fread(here(loopDir, paste0("G1.A485.Merged_chromosight_", 5, "kb.bedpe"))))
overlap.25kb <- findOverlaps(cons.loop, loop.25kb)
overlap.10kb <- findOverlaps(cons.loop, loop.10kb)
overlap.5kb <- findOverlaps(cons.loop, loop.5kb)
index <- sort(unique(c(queryHits(overlap.25kb),
queryHits(overlap.10kb),
queryHits(overlap.5kb))))
data$calledByA485 <- 0
data$calledByA485[index] <- 1
# Heatmap (checking whether certain peak is called by specific sample)
dataToPlot <- data %>% dplyr::select(id, calledByDMSO, calledBydTAG, calledByA485)
dataToPlot <- dataToPlot %>% dplyr::mutate(flag = 4*calledByDMSO + 2*calledBydTAG + calledByA485) %>%
dplyr::arrange(desc(flag))
row_groups <- dataToPlot$flag
data_matrix <- dataToPlot %>% column_to_rownames(var = "id") %>% dplyr::select(-flag) %>% as.matrix()
col_fun <- colorRamp2(c(0, 1),
c("white", "green"))
h1 <- Heatmap(data_matrix, name = "calledBy",
cluster_columns = FALSE,
cluster_rows = FALSE,
show_row_dend = FALSE,
border = TRUE,
show_row_names = FALSE,
col = col_fun,
row_split = row_groups)
# Prepare data for the second heatmap
dataToPlot2 <- data %>% dplyr::select(id, DMSO, dTAG, A485) %>%
arrange(match(id, dataToPlot$id))
data_matrix2 <- dataToPlot2 %>% column_to_rownames(var = "id") %>% as.matrix()
col_fun2 <- colorRamp2(c(-0.5, 0, 1), c("blue", "white", "red"))
h2 <- Heatmap(data_matrix2, name = "score",
cluster_columns = FALSE,
cluster_rows = TRUE,
show_row_dend = FALSE,
border = TRUE,
show_row_names = FALSE,
col = col_fun2,
row_split = row_groups)
fileName <- here(figDir, paste0("heatmap_checkingSampleCalledLoops"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 3, height = 10)
print(h1 + h2)
dev.off()
# Heatmap
library(circlize)
# Heatmap 2
set.seed(123)
diffCutoff <- 0.2
dataToPlot <- data %>% dplyr::mutate(isdiff_dTAG_DMSO =
case_when(diff_dTAG_DMSO >= diffCutoff ~ 1,
abs(diff_dTAG_DMSO) < diffCutoff ~ 0,
diff_dTAG_DMSO <= -diffCutoff ~ -1,
TRUE ~ NA),
isdiff_A485_DMSO =
case_when(diff_A485_DMSO >= diffCutoff ~ 1,
abs(diff_A485_DMSO) < diffCutoff ~ 0,
diff_A485_DMSO <= -diffCutoff ~ -1,
TRUE ~ NA)) %>%
dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
cluster_columns = FALSE,
cluster_rows = FALSE,
row_km = 9,
show_row_dend = FALSE,
border = FALSE,
show_row_names = FALSE,
col = col_fun)
fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()
set.seed(123)
hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)
loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
loop.cluster5 <- temp[row_clusters[[5]]]
loop.cluster6 <- temp[row_clusters[[6]]]
loop.cluster7 <- temp[row_clusters[[7]]]
loop.cluster8 <- temp[row_clusters[[8]]]
loop.cluster9 <- temp[row_clusters[[9]]]
# Heatmap
library(circlize)
# Heatmap 2
set.seed(123)
diffCutoff <- 0.2
dataToPlot <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
dplyr::mutate(isdiff_dTAG_DMSO =
case_when(diff_dTAG_DMSO >= diffCutoff ~ 1,
abs(diff_dTAG_DMSO) < diffCutoff ~ 0,
diff_dTAG_DMSO <= -diffCutoff ~ -1,
TRUE ~ NA),
isdiff_A485_DMSO =
case_when(diff_A485_DMSO >= diffCutoff ~ 1,
abs(diff_A485_DMSO) < diffCutoff ~ 0,
diff_A485_DMSO <= -diffCutoff ~ -1,
TRUE ~ NA)) %>%
dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
cluster_columns = FALSE,
cluster_rows = FALSE,
row_km = 8,
show_row_dend = FALSE,
border = FALSE,
show_row_names = FALSE,
col = col_fun)
fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_reg"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()
set.seed(123)
hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)
loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
loop.cluster5 <- temp[row_clusters[[5]]]
loop.cluster6 <- temp[row_clusters[[6]]]
loop.cluster7 <- temp[row_clusters[[7]]]
loop.cluster8 <- temp[row_clusters[[8]]]
library(circlize)
set.seed(123)
dataToPlot <- data %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
dplyr::select(id, diff_dTAG_DMSO, diff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "white", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
cluster_columns = FALSE,
cluster_rows = FALSE,
row_km = 4,
show_row_dend = FALSE,
border = TRUE,
show_row_names = FALSE,
col = col_fun)
fileName <- here(figDir, paste0("heatmap_diffScore_reg_k4"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 6)
print(h1)
dev.off()
hm_drawn <- draw(h1)
row_clusters <- row_order(hm_drawn)
loop.cluster1 <- temp[row_clusters[[1]]]
loop.cluster2 <- temp[row_clusters[[2]]]
loop.cluster3 <- temp[row_clusters[[3]]]
loop.cluster4 <- temp[row_clusters[[4]]]
# Heatmap
library(circlize)
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Heatmap 2
set.seed(123)
# Import obs/exp scores and merge to the dataset
minValue <- -4
diffCutoff <- 0.5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Merge dataset
dataToPlot <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO,
isdiff_dTAG_DMSO = case_when(log_obsexp_diff_dTAG_DMSO >= diffCutoff ~ 1,
abs(log_obsexp_diff_dTAG_DMSO) < diffCutoff ~ 0,
log_obsexp_diff_dTAG_DMSO <= diffCutoff ~ -1,
TRUE ~ NA ),
isdiff_A485_DMSO = case_when(log_obsexp_diff_A485_DMSO >= diffCutoff ~ 1,
abs(log_obsexp_diff_A485_DMSO) < diffCutoff ~ 0,
log_obsexp_diff_A485_DMSO <= diffCutoff ~ -1,
TRUE ~ NA))%>%
dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
cluster_columns = FALSE,
cluster_rows = FALSE,
row_km = 9,
show_row_dend = FALSE,
border = FALSE,
show_row_names = FALSE,
col = col_fun)
fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_OE"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()
#
# set.seed(123)
#
# hm_drawn <- draw(h1)
# row_clusters <- row_order(hm_drawn)
#
# loop.cluster1 <- temp[row_clusters[[1]]]
# loop.cluster2 <- temp[row_clusters[[2]]]
# loop.cluster3 <- temp[row_clusters[[3]]]
# loop.cluster4 <- temp[row_clusters[[4]]]
# loop.cluster5 <- temp[row_clusters[[5]]]
# loop.cluster6 <- temp[row_clusters[[6]]]
# loop.cluster7 <- temp[row_clusters[[7]]]
# loop.cluster8 <- temp[row_clusters[[8]]]
# loop.cluster9 <- temp[row_clusters[[9]]]
# Heatmap
library(circlize)
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Heatmap 2
set.seed(123)
# Import obs/exp scores and merge to the dataset
minValue <- -4
diffCutoff <- 0.5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Merge dataset
dataToPlot <- data %>% dplyr::left_join(obsexp, by = c("id")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E")) %>%
dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO,
isdiff_dTAG_DMSO = case_when(log_obsexp_diff_dTAG_DMSO >= diffCutoff ~ 1,
abs(log_obsexp_diff_dTAG_DMSO) < diffCutoff ~ 0,
log_obsexp_diff_dTAG_DMSO <= diffCutoff ~ -1,
TRUE ~ NA ),
isdiff_A485_DMSO = case_when(log_obsexp_diff_A485_DMSO >= diffCutoff ~ 1,
abs(log_obsexp_diff_A485_DMSO) < diffCutoff ~ 0,
log_obsexp_diff_A485_DMSO <= diffCutoff ~ -1,
TRUE ~ NA))%>%
dplyr::select(id, isdiff_dTAG_DMSO, isdiff_A485_DMSO)
data_matrix <- dataToPlot %>% column_to_rownames(var = "id")%>% as.matrix()
col_fun <- colorRamp2(c(-1, 0, 1), c("blue", "grey", "red"))
h1 <- Heatmap(data_matrix, name = "loop score",
cluster_columns = FALSE,
cluster_rows = FALSE,
row_km = 9,
show_row_dend = FALSE,
border = FALSE,
show_row_names = FALSE,
col = col_fun)
fileName <- here(figDir, paste0("heatmap_checkingDeltaAcrossCondition_reg_OE"))
png(paste0(fileName, ".png"), res = 600, units = "in", width = 2, height = 10)
print(h1)
dev.off()
#
# set.seed(123)
#
# hm_drawn <- draw(h1)
# row_clusters <- row_order(hm_drawn)
#
# loop.cluster1 <- temp[row_clusters[[1]]]
# loop.cluster2 <- temp[row_clusters[[2]]]
# loop.cluster3 <- temp[row_clusters[[3]]]
# loop.cluster4 <- temp[row_clusters[[4]]]
# loop.cluster5 <- temp[row_clusters[[5]]]
# loop.cluster6 <- temp[row_clusters[[6]]]
# loop.cluster7 <- temp[row_clusters[[7]]]
# loop.cluster8 <- temp[row_clusters[[8]]]
# loop.cluster9 <- temp[row_clusters[[9]]]
Where does differentiated related genes fall into?
marginError <- function(myList) {
sample.n = length(myList)
sample.sd = sd(myList)
sample.se = sample.sd/sqrt(sample.n)
alpha = 0.05
degrees.freedom = sample.n - 1
t.score = qt(p = alpha/2, df = degrees.freedom, lower.tail = F)
margin.error = t.score*sample.se
return(margin.error)
}
PROseq.bobbie <- as_tibble(fread(here(refDir, "bobbie_gene_classification.csv")))
geneList.Epi.dTAG.up <- (fread(here(refDir, "diff_G1.dTAG_G1.Epi.dTAG_vs_G1.Epi.DMSO.tsv")) %>% dplyr::filter(padj < 0.05, log2FoldChange > 0))$ensembl_gene_id
geneList.Epi.dTAG.down <- (fread(here(refDir, "diff_G1.dTAG_G1.Epi.dTAG_vs_G1.Epi.DMSO.tsv")) %>% dplyr::filter(padj < 0.05, log2FoldChange < 0))$ensembl_gene_id
## Converting transcript ID to gene ID
idPair_tg <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
filters = "ensembl_transcript_id",
values = PROseq.bobbie$enst,
mart = ensembl.v102)
PROseq.bobbie <- PROseq.bobbie %>% dplyr::left_join(idPair_tg, by = c("enst" = "ensembl_transcript_id"))
## Bar Plot
temp <- PROseq.bobbie %>% dplyr::filter(ensembl_gene_id %in% geneList.Epi.dTAG.down)
group <- c(rep("EpiLC_dTAGvsDMSO_DOWN", 4))
cluster <- rep(c("Early", "Middle", "Late", "Transient"), 1)
cluster <- factor(cluster, levels = c("Early", "Middle", "Late", "Transient"))
value <- c(sum(temp$Cluster == "Early"),
sum(temp$Cluster == "Middle"),
sum(temp$Cluster == "Late"),
sum(temp$Cluster == "Transient"))
data <- data.frame(group, cluster, value)
ggplot(data, aes(fill=cluster, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
## Line plot
temp <- temp %>% dplyr::select(c(4, 5, 6, 7, 8, 9))
temp.tall = temp %>% pivot_longer(-c(1, 2), names_to = "timepoints", values_to = "value")
gg1 = ggplot(temp.tall,
aes( x = factor(timepoints, level = c("MIT", "EG1", "LG1", "ASYN")),
y = value,
group = enst, col = enst)) +
geom_line() +
theme_classic() +
ggtitle("Bobbie, PRO-seq", subtitle = "selected genes") +
xlab ("timepoints") + ylab("value") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
ggsave(filename = here(figDir, "bobbie_gene_EpiLC_dTAGvsDMSO_DOWN.png"), gg1, width = 4, height = 4, dpi = 300, units = "in", device = "png")
# DRAW RIBBON
temp.ribbon = tibble("timepoints" = c("MIT", "EG1", "LG1", "ASYN"),
"value" = c(mean(temp$MIT), mean(temp$EG1), mean(temp$LG1), mean(temp$ASYN)),
"lower" = c(mean(temp$MIT) - marginError(temp$MIT),
mean(temp$EG1) - marginError(temp$EG1),
mean(temp$LG1) - marginError(temp$LG1),
mean(temp$ASYN) - marginError(temp$ASYN)),
"upper" = c(mean(temp$MIT) + marginError(temp$MIT),
mean(temp$EG1) + marginError(temp$EG1),
mean(temp$LG1) + marginError(temp$LG1),
mean(temp$ASYN) + marginError(temp$ASYN)))
gg1 = ggplot(temp.ribbon, aes(x = factor(timepoints, level = c("MIT", "EG1", "LG1", "ASYN")),
y = value, group = 1)) +
geom_line(color = "black") +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.3) +
theme_classic() +
ggtitle("Bobbie, PRO-seq", subtitle = "selected genes") +
xlab ("timepoints") + ylab("value") + ylim(0, 65) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
ggsave(filename = here(figDir, "bobbie_gene_EpiLC_dTAGvsDMSO_DOWN_95CI.png"), gg1, width = 4, height = 4, dpi = 300, units = "in", device = "png")
###[2.33] Finding closest enhancer Here, enhancer will be defined by H3K27ac peak It makes more sence to include the enhancer peak on gene body (strategy 1) exclude +-10kb region from TSS only (strategy 2) only check the E-P loops called in Micro-C ##### binary group - excluding all gene body
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
dplyr::select(ensembl, chr, TSS)
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
gene.tb <- gene.tb %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
seqnames = gene.tb$chr,
ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
ensembl = gene.tb$ensembl
)
genebody.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>% dplyr::select(V1, V2, V3)
colnames(genebody.tb) <- c("chr", "start", "end")
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K27ac<- makeGRangesFromDataFrame(fread(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed")) %>%
dplyr::mutate(chr = V1, start = (V2 + V3)/2, end = (V2 + V3)/2) %>%
dplyr::select(chr, start, end))
####### Filtering out peaks overlapping with the gene body
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)
# Indices of peaks that overlap the TSS
overlapping_peak_indices <- unique(subjectHits(overlaps))
# Exclude overlapping peaks
non_overlapping_peaks <- peak.H3K27ac[-overlapping_peak_indices]
####### Calculating distance to nearest peak
nearest_peak_indices <- nearest(genes.gr, non_overlapping_peaks)
nearest_peaks <- non_overlapping_peaks[nearest_peak_indices]
distances <- distance(genes.gr, nearest_peaks)
results <- data.frame(
ensembl = mcols(genes.gr)$ensembl,
gene_chr = as.character(seqnames(genes.gr)),
gene_TSS = start(genes.gr),
peak_chr = as.character(seqnames(nearest_peaks)),
peak_start = start(nearest_peaks),
peak_end = end(nearest_peaks),
distance = distances
)
results <- results %>% dplyr::mutate(group = case_when(ensembl %in% group1 ~ "group1",
ensembl %in% group2 ~ "group2",
TRUE ~ NA))
ggplot(results, aes(x = group, y = distance, fill = group)) + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw() + ggtitle("distance to nearest H3K27ac peak excluding gene body") + scale_y_continuous(labels = label_kb_mb) + coord_cartesian(ylim = c(0, 250*1000))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
dplyr::select(ensembl, chr, TSS) %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
seqnames = gene.tb$chr,
ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
ensembl = gene.tb$ensembl
)
genebody.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>% dplyr::filter(V6 %in% c(group1, group2))%>% dplyr::select(V1, V2, V3)
colnames(genebody.tb) <- c("chr", "start", "end")
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)
# Step 1: Find overlaps between gene bodies and peaks
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)
# Create a list mapping each gene to the indices of peaks overlapping with its gene body
overlapping_peaks_per_gene <- split(subjectHits(overlaps), queryHits(overlaps))
# Initialize an empty list to store results
results_list <- vector("list", length(genes.gr))
# Step 2: For each gene, exclude overlapping peaks and find the nearest peak to its TSS
for (i in seq_along(genes.gr)) {
gene <- genes.gr[i]
# Get indices of peaks overlapping with this gene's body
overlapping_peak_indices <- overlapping_peaks_per_gene[[as.character(i)]]
# Exclude overlapping peaks for this gene
if (!is.null(overlapping_peak_indices)) {
peaks_to_consider <- peak.H3K27ac[-overlapping_peak_indices]
} else {
peaks_to_consider <- peak.H3K27ac
}
# Find the nearest peak to the TSS of this gene
nearest_peak_index <- nearest(gene, peaks_to_consider)
if (is.na(nearest_peak_index)) {
# No peaks found; set NA values
results_list[[i]] <- data.frame(
ensembl = mcols(gene)$ensembl,
gene_chr = as.character(seqnames(gene)),
gene_TSS = start(gene),
peak_chr = NA,
peak_start = NA,
peak_end = NA,
distance = NA
)
} else {
nearest_peak <- peaks_to_consider[nearest_peak_index]
dist <- distance(gene, nearest_peak)
results_list[[i]] <- data.frame(
ensembl = mcols(gene)$ensembl,
gene_chr = as.character(seqnames(gene)),
gene_TSS = start(gene),
peak_chr = as.character(seqnames(nearest_peak)),
peak_start = start(nearest_peak),
peak_end = end(nearest_peak),
distance = dist
)
}
}
# Combine results into a single data frame
results <- do.call(rbind, results_list)
# Add group information
results <- results %>% dplyr::mutate(group = case_when(
ensembl %in% group1 ~ "group1",
ensembl %in% group2 ~ "group2",
TRUE ~ NA_character_
))
# Plotting
ggplot(results, aes(x = group, y = distance, fill = group)) +
geom_violin() +
geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
theme_bw() +
ggtitle("Distance to Nearest H3K27ac Peak Excluding Gene Body Overlaps") +
scale_y_continuous(labels = label_kb_mb) +
coord_cartesian(ylim = c(0, 50 * 1000))
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
dplyr::select(ensembl, chr, TSS)
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
gene.tb <- gene.tb %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
seqnames = gene.tb$chr,
ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
ensembl = gene.tb$ensembl
)
genebody.tb <- gene.tb %>% dplyr::mutate(start = TSS-10*1000, end = TSS + 10*1000) %>%
dplyr::select(chr, start, end)
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
# peak.H3K27ac<- makeGRangesFromDataFrame(fread(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed")) %>%
# dplyr::mutate(chr = V1, start = (V2 + V3)/2, end = (V2 + V3)/2) %>%
# dplyr::select(chr, start, end))
####### Filtering out peaks overlapping with the gene body
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)
# Indices of peaks that overlap the TSS
overlapping_peak_indices <- unique(subjectHits(overlaps))
# Exclude overlapping peaks
non_overlapping_peaks <- peak.H3K27ac[-overlapping_peak_indices]
####### Calculating distance to nearest peak
nearest_peak_indices <- nearest(genes.gr, non_overlapping_peaks)
nearest_peaks <- non_overlapping_peaks[nearest_peak_indices]
distances <- distance(genes.gr, nearest_peaks)
results <- data.frame(
ensembl = mcols(genes.gr)$ensembl,
gene_chr = as.character(seqnames(genes.gr)),
gene_TSS = start(genes.gr),
peak_chr = as.character(seqnames(nearest_peaks)),
peak_start = start(nearest_peaks),
peak_end = end(nearest_peaks),
distance = distances
)
results <- results %>% dplyr::mutate(group = case_when(ensembl %in% group1 ~ "group1",
ensembl %in% group2 ~ "group2",
TRUE ~ NA))
ggplot(results, aes(x = group, y = distance, fill = group)) + geom_violin() + geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw() + ggtitle("distance to nearest H3K27ac peak excluding gene body") + scale_y_continuous(labels = label_kb_mb) + coord_cartesian(ylim = c(0, 250*1000))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
dplyr::select(ensembl, chr, TSS) %>% dplyr::filter(ensembl %in% c(group1, group2))
genes.gr <- GRanges(
seqnames = gene.tb$chr,
ranges = IRanges(start = gene.tb$TSS, end = gene.tb$TSS),
ensembl = gene.tb$ensembl
)
genebody.tb <- gene.tb %>% dplyr::mutate(start = TSS-10*1000, end = TSS + 10*1000) %>%
dplyr::select(chr, start, end)
genebody.gr <- makeGRangesFromDataFrame(genebody.tb)
peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
# Step 1: Find overlaps between gene bodies and peaks
overlaps <- findOverlaps(genebody.gr, peak.H3K27ac)
# Create a list mapping each gene to the indices of peaks overlapping with its gene body
overlapping_peaks_per_gene <- split(subjectHits(overlaps), queryHits(overlaps))
# Initialize an empty list to store results
results_list <- vector("list", length(genes.gr))
# Step 2: For each gene, exclude overlapping peaks and find the nearest peak to its TSS
for (i in seq_along(genes.gr)) {
gene <- genes.gr[i]
# Get indices of peaks overlapping with this gene's body
overlapping_peak_indices <- overlapping_peaks_per_gene[[as.character(i)]]
# Exclude overlapping peaks for this gene
if (!is.null(overlapping_peak_indices)) {
peaks_to_consider <- peak.H3K27ac[-overlapping_peak_indices]
} else {
peaks_to_consider <- peak.H3K27ac
}
# Find the nearest peak to the TSS of this gene
nearest_peak_index <- nearest(gene, peaks_to_consider)
if (is.na(nearest_peak_index)) {
# No peaks found; set NA values
results_list[[i]] <- data.frame(
ensembl = mcols(gene)$ensembl,
gene_chr = as.character(seqnames(gene)),
gene_TSS = start(gene),
peak_chr = NA,
peak_start = NA,
peak_end = NA,
distance = NA
)
} else {
nearest_peak <- peaks_to_consider[nearest_peak_index]
dist <- distance(gene, nearest_peak)
results_list[[i]] <- data.frame(
ensembl = mcols(gene)$ensembl,
gene_chr = as.character(seqnames(gene)),
gene_TSS = start(gene),
peak_chr = as.character(seqnames(nearest_peak)),
peak_start = start(nearest_peak),
peak_end = end(nearest_peak),
distance = dist
)
}
}
# Combine results into a single data frame
results <- do.call(rbind, results_list)
# Add group information
results <- results %>% dplyr::mutate(group = case_when(
ensembl %in% group1 ~ "group1",
ensembl %in% group2 ~ "group2",
TRUE ~ NA_character_
))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group == group1) )$distance
distance2 <- (data %>% dplyr::filter(group == group2) )$distance
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
getPvalWilcox(results, "group1", "group2")
# Plotting
ggplot(results, aes(x = group, y = distance, fill = group)) +
geom_violin() +
geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
theme_bw() +
ggtitle("Distance to Nearest H3K27ac Peak Excluding Gene Body Overlaps") +
scale_y_continuous(labels = label_kb_mb) +
coord_cartesian(ylim = c(0, 50 * 1000)) +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
diffCutoff = 0.2) %>%
dplyr::filter(Anno2 == "P-E")
temp <- geneAnnoData %>% unnest(gene)
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3), ensembl = V6, chr = V1) %>%
dplyr::select(ensembl, chr, TSS)
temp <- dplyr::left_join(temp, gene.tb, by = c("gene" = "ensembl"))
temp <- temp %>% dplyr::mutate(center1 = (start1 + end1)/2,
center2 = (start2 + end2)/2,
distance1 = abs(TSS-center1),
distance2 = abs(TSS-center2)) %>%
dplyr::mutate(distance = pmax(distance1, distance2))
temp <- temp %>% group_by(gene) %>% summarize(min_enh_distance = min(distance)) %>%
dplyr::mutate(group = case_when(gene %in% group1 ~ "group1",
gene %in% group2 ~ "group2",
TRUE ~ NA)) %>%
dplyr::filter(!is.na(group))
ggplot(temp, aes(x = group, y = min_enh_distance, fill = group)) +
geom_violin() +
geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") +
theme_bw() +
ggtitle("Distance to Nearest Enhancer fro E-P") +
scale_y_continuous(labels = label_kb_mb) +
coord_cartesian(ylim = c(0, 1000 * 1000))
Idea: having one non-perturbed reg loops could be enough.
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(size = V3 - V2,
id = paste(V1, size, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(size = V3 - V2,
id = paste(V1, size, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(size = V3 - V2,
id = paste(V1, size, V2, V5, sep = "_"))
geneAnnoData <- geneAnnoData %>% dplyr::filter(id %in% c(loop.up$id, loop.no$id, loop.down$id)) %>%
dplyr::mutate(loopType = case_when(id %in% c(loop.up$id, loop.no$id) ~ "insensitive",
id %in% c(loop.down$id) ~ "sensitive",
TRUE ~ NA))
data <- geneAnnoData %>% dplyr::select(gene, id, loopType) %>% unnest(gene) %>%
group_by(gene) %>% summarize(
insensitive = sum(loopType == "insensitive", na.rm = TRUE),
sensitive = sum(loopType == "sensitive", na.rm = TRUE))
###
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
geneGroup <- tibble(group = c(rep("group1", length(group1)), rep("group2", length(group2))),
gene = c(group1, group2))
data <- left_join(geneGroup, data, by = c("gene")) %>% dplyr::mutate(mutate(across(everything(), ~replace_na(., 0))))
###
data <- data %>% dplyr::mutate(presenceOfRetained = if_else(insensitive > 0, "YES", "NO"),
percOfRetained = if_else(insensitive + sensitive == 0, 0, 100*insensitive/(insensitive + sensitive)))
###
ggplot(data, aes(x = group, y = percOfRetained, fill = group)) + geom_violin() +
geom_boxplot(outlier.shape = NA, width = 0.1, fill = "white") + theme_bw()
### Stacked barplot
group <- c("group1", "group1", "group2", "group2")
presenceOfRetained <- rep(c("YES", "NO"), 2)
presenceOfRetained <- factor (presenceOfRetained, levels = c("YES", "NO"))
value <- c(nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "YES")),
nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "NO")),
nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "YES")),
nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "NO")))
plotData <- data.frame(group, presenceOfRetained, value)
ggplot(plotData, aes(fill=presenceOfRetained, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-pe_ensemblList.tsv")),
diffCutoff = 0.2)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(size = V3 - V2,
id = paste(V1, size, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(size = V3 - V2,
id = paste(V1, size, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(size = V3 - V2,
id = paste(V1, size, V2, V5, sep = "_"))
geneAnnoData <- geneAnnoData %>% dplyr::filter(id %in% c(loop.up$id, loop.no$id, loop.down$id)) %>%
dplyr::mutate(loopType = case_when(id %in% c(loop.up$id, loop.no$id) ~ "insensitive",
id %in% c(loop.down$id) ~ "sensitive",
TRUE ~ NA))
data <- geneAnnoData %>% dplyr::select(gene, id, loopType) %>% unnest(gene) %>%
group_by(gene) %>% summarize(
insensitive = sum(loopType == "insensitive", na.rm = TRUE),
sensitive = sum(loopType == "sensitive", na.rm = TRUE))
###
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
geneGroup <- tibble(group = c(rep("group1", length(group1)), rep("group2", length(group2))),
gene = c(group1, group2))
data <- left_join(geneGroup, data, by = c("gene")) %>% dplyr::mutate(mutate(across(everything(), ~replace_na(., 0))))
###
data <- data %>% dplyr::mutate(presenceOfRetained = if_else(insensitive > 0, "YES", "NO"),
percOfRetained = if_else(insensitive + sensitive == 0, 0, 100*insensitive/(insensitive + sensitive)))
###
ggplot(data, aes(x = group, y = percOfRetained, fill = group)) + geom_violin() +
geom_boxplot(outlier.shape = NA, width = 0.05, fill = "white") + theme_bw()
### Stacked barplot
group <- c("group1", "group1", "group2", "group2")
presenceOfRetained <- rep(c("YES", "NO"), 2)
presenceOfRetained <- factor (presenceOfRetained, levels = c("YES", "NO"))
value <- c(nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "YES")),
nrow(data %>% dplyr::filter(group == "group1", presenceOfRetained == "NO")),
nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "YES")),
nrow(data %>% dplyr::filter(group == "group2", presenceOfRetained == "NO")))
plotData <- data.frame(group, presenceOfRetained, value)
ggplot(plotData, aes(fill=presenceOfRetained, y=value, x=group)) +
geom_bar(position="fill", stat="identity") + theme_classic()
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))
sample <- "G1.DMSO.Merged"
for(sample in c("G1.DMSO.Merged", "G1.dTAG.Merged", "G1.A485.Merged")){
temp.25kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_25kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
colnames(temp.25kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
temp.10kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_10kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
colnames(temp.10kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
temp.5kb <-fread(here(consensusDir, paste0("loopScore_", sample, "_5kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
colnames(temp.5kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
temp <- bind_rows(temp.25kb, temp.10kb, temp.5kb)
fwrite(temp, here(consensusDir, paste0("loopScore_", sample, ".tsv")), sep = "\t")
}
temp.DMSO <- fread(here(consensusDir, "loopScore_G1.DMSO.Merged.tsv")) %>%
dplyr::select(id, obs, obsexp)
colnames(temp.DMSO) <- c("id", "obs_DMSO", "obsexp_DMSO")
temp.dTAG <- fread(here(consensusDir, "loopScore_G1.dTAG.Merged.tsv")) %>%
dplyr::select(id, obs, obsexp)
colnames(temp.dTAG) <- c("id", "obs_dTAG", "obsexp_dTAG")
temp.A485 <- fread(here(consensusDir, "loopScore_G1.A485.Merged.tsv")) %>%
dplyr::select(id, obs, obsexp)
colnames(temp.A485) <- c("id", "obs_A485", "obsexp_A485")
loops_oe <- full_join(full_join(temp.DMSO, temp.dTAG, by = "id"),
temp.A485, by = "id") %>%
dplyr::mutate(oeFC_dTAG_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_dTAG/obsexp_DMSO),
oeFC_A485_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_A485/obsexp_DMSO))
fwrite(loops_oe, here(consensusDir, paste0("loopScore_cons_obsexp.tsv")), sep = "\t")
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))
for(sample in c("Async.UT", "Async.AID")){
temp.25kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_25kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
colnames(temp.25kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
temp.10kb <- fread(here(consensusDir, paste0("loopScore_", sample, "_10kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
colnames(temp.10kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
temp.5kb <-fread(here(consensusDir, paste0("loopScore_", sample, "_5kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(chrom1, start1, end1, chrom2, start2, end2, id, observed, "O/E")
colnames(temp.5kb) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "id", "obs", "obsexp")
temp <- bind_rows(temp.25kb, temp.10kb, temp.5kb)
fwrite(temp, here(consensusDir, paste0("loopScore_", sample, ".tsv")), sep = "\t")
}
temp.UT <- fread(here(consensusDir, "loopScore_Async.UT.tsv")) %>%
dplyr::select(id, obs, obsexp)
colnames(temp.UT) <- c("id", "obs_UT", "obsexp_UT")
temp.AID <- fread(here(consensusDir, "loopScore_Async.AID.tsv")) %>%
dplyr::select(id, obs, obsexp)
colnames(temp.AID) <- c("id", "obs_AID", "obsexp_AID")
loops_oe <- full_join(temp.UT, temp.AID, by = "id") %>%
dplyr::mutate(oeFC_AID_UT = if_else(obsexp_UT == 0, NA, obsexp_AID/obsexp_UT))
fwrite(loops_oe, here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv")), sep = "\t")
Only the loops that made to consensus loops are considered ##### G1
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))
res <- 25
for (res in c(25, 10, 5)){
temp.DMSO <- fread(here(consensusDir, paste0("loopScore_G1.DMSO.Merged_", res, "kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(id, observed, "O/E")
colnames(temp.DMSO) <- c("id", "obs_DMSO", "obsexp_DMSO")
temp.dTAG <- fread(here(consensusDir, paste0("loopScore_G1.dTAG.Merged_", res, "kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(id, observed, "O/E")
colnames(temp.dTAG) <- c("id", "obs_dTAG", "obsexp_dTAG")
temp.A485 <- fread(here(consensusDir, paste0("loopScore_G1.A485.Merged_", res, "kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(id, observed, "O/E")
colnames(temp.A485) <- c("id", "obs_A485", "obsexp_A485")
temp <- full_join(full_join(temp.DMSO, temp.dTAG, by = c("id")), temp.A485, by = c("id")) %>%
dplyr::mutate(oeFC_dTAG_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_dTAG/obsexp_DMSO),
oeFC_A485_DMSO = if_else(obsexp_DMSO == 0, NA, obsexp_A485/obsexp_DMSO))
fwrite(temp, here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv")), sep = "\t")
}
loops <- fread(here(consensusDir, "chromo_cons_score.tsv"))
res <- 25
for (res in c(25, 10, 5)){
temp.UT <- fread(here(consensusDir, paste0("loopScore_Async.UT_", res, "kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(id, observed, "O/E")
colnames(temp.UT) <- c("id", "obs_UT", "obsexp_UT")
temp.AID <- fread(here(consensusDir, paste0("loopScore_Async.AID_", res, "kb.tsv"))) %>%
dplyr::mutate(binSize = end1 - start1,
id = paste(chrom1, binSize, start1, start2, sep = "_")) %>%
dplyr::filter(id %in% loops$id) %>%
dplyr::select(id, observed, "O/E")
colnames(temp.AID) <- c("id", "obs_AID", "obsexp_AID")
temp <- full_join(temp.UT, temp.AID, by = c("id")) %>%
dplyr::mutate(oeFC_AID_UT = if_else(obsexp_UT == 0, NA, obsexp_AID/obsexp_UT))
fwrite(temp, here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb_async.tsv")), sep = "\t")
}
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
for(res in c(25, 10, 5)){
minValue <- -4
maxValue <- 5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0(res, "kb, log2(obs/exp)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0(res, "kb, log2(obs/exp), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0(res, "kb, log2(obs/exp), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0(res, "kb, log2(obs/exp), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_dTAG_vs_DMSO_", res, "kb")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
}
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
minValue <- -4
maxValue <- 5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_dTAG_vs_DMSO")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), structural loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), regulatory loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_dTAG_vs_DMSO_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
specificLoop.25kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_25kb.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
specificLoop.10kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_10kb.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
specificLoop.5kb <- fread(here(loopDir, "G1.DMSO.Merged_chromosight_5kb.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
specificLoops.DMSO <- c(specificLoop.25kb$id, specificLoop.10kb$id, specificLoop.5kb$id)
minValue <- -4
maxValue <- 5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id,
id %in% specificLoops.DMSO)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), structural loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id,
id %in% specificLoops.DMSO)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), regulatory loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_dTAG_vs_DMSO_str_vs_reg_DMSOspecificLoops")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Import obs/exp scores and merge to the dataset
minValue <- -4
maxValue <- 5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>% dplyr::mutate(distance = start2 - start1)
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
# distance filter
obsexp <- data %>% dplyr::filter(distance > 1e6)
# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_dTAG_vs_DMSO_1mbover")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), structural loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), regulatory loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_dTAG_vs_DMSO_1mbover_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
minValue <- -4
maxValue <- 10
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
# dTAG
obsexp$density <- get_density(obsexp$log_obs_DMSO, obsexp$log_obs_dTAG, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_dTAG_vs_DMSO_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), structural loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_dTAG, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_dTAG, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), regulatory loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_dTAG_vs_DMSO_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
minValue <- -8
maxValue <- 7
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv"))) %>%
dplyr::mutate(log_obsexp_UT = if_else(obsexp_UT == 0, minValue, log2(obsexp_UT)),
log_obsexp_AID = if_else(obsexp_AID == 0, minValue, log2(obsexp_AID)))
# dTAG
obsexp$density <- get_density(obsexp$log_obsexp_UT, obsexp$log_obsexp_AID, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_AID_vs_UT")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), structural loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_UT, temp$log_obsexp_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_UT, y = log_obsexp_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), regulatory loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_AID_vs_UT_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
minValue <- -20
maxValue <- 10
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_Async.tsv"))) %>%
dplyr::mutate(log_obsexp_UT = if_else(obsexp_UT == 0, minValue, log2(obsexp_UT)),
log_obsexp_AID = if_else(obsexp_AID == 0, minValue, log2(obsexp_AID)),
log_obs_UT = if_else(obs_UT == 0, minValue, log2(obs_UT)),
log_obs_AID = if_else(obs_AID == 0, minValue, log2(obs_AID)))
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
# dTAG
obsexp$density <- get_density(obsexp$log_obs_UT, obsexp$log_obs_AID, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obs_AID_vs_UT_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), structural loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_UT, temp$log_obs_AID, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_UT, y = log_obs_AID, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), regulatory loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obs_AID_vs_UT_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
for(res in c(25, 10, 5)){
minValue <- -4
maxValue <- 5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp_", res, "kb.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# A485
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_A485, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0(res, "kb, log2(obs/exp)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0(res, "kb, log2(obs/exp), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0(res, "kb, log2(obs/exp), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0(res, "kb, log2(obs/exp), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_A485_vs_DMSO_", res, "kb")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
}
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
minValue <- -4
maxValue <- 5
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# A485
obsexp$density <- get_density(obsexp$log_obsexp_DMSO, obsexp$log_obsexp_A485, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_A485_vs_DMSO")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), structural loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obsexp_DMSO, temp$log_obsexp_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs/exp), regulatory loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_A485_vs_DMSO_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
minValue <- -4
maxValue <- 10
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))
# Importing loops
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_A485vsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
# dTAG
obsexp$density <- get_density(obsexp$log_obs_DMSO, obsexp$log_obs_A485, n = 100)
obsexp <- obsexp %>% dplyr::arrange(density)
g1 <- ggplot(obsexp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs)")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Visualize UP DOWN loops
temp <- obsexp %>% dplyr::filter(id %in% loop.down$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g2 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), down")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.no$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g3 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), no")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
temp <- obsexp %>% dplyr::filter(id %in% loop.up$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g4 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), up")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_A485_vs_DMSO_obs")
height <- 4
width <- 12
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g1, g4, g3, g2, ncol = 4))
dev.off()
# Structural
loop.str <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.str$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g5 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), structural loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
# Reg (PE-PE)
loop.reg <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe")) %>%
dplyr::mutate(binSize = V3 - V2,
id = paste(V1, binSize, V2, V5, sep = "_"))
temp <- obsexp %>% dplyr::filter(id %in% loop.reg$id)
temp$density <- get_density(temp$log_obs_DMSO, temp$log_obs_A485, n = 100)
temp <- temp %>% dplyr::arrange(density)
g6 <- ggplot(temp, aes(x = log_obs_DMSO, y = log_obs_A485, color = density)) +
geom_point() + theme_bw() + scale_color_viridis(guide = "none") +
geom_abline(slope = 1, intercept = 0) +
ggtitle(paste0("log2(obs), regulatory loops")) +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue))
fileName <- paste0("obsexp_A485_vs_DMSO_obs_str_vs_reg")
height <- 4
width <- 6
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(g5, g6, ncol = 2))
dev.off()
create_dist_vs_avgScore_perTreatment_oe <- function(data, figDir, name, loopList, colorList, se = FALSE){
avg_scores_long <- data %>%
group_by(distance, Anno2) %>%
summarise(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) +
geom_smooth(show.legend = TRUE, se = se) +
#ylim(0, 0.5) +
theme_classic() + scale_x_continuous(labels = label_kb_mb) +
scale_color_manual(values = colorList) +
scale_fill_manual(values = colorList) +
labs(title = paste0(name),
x = "Distance",
y = "Average Score") +
theme(plot.title = element_text(size = 8))
fileName <- paste0("dist_vs_score_linePlot_", name)
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = 4, height = 3)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 4, height = 3)
print(p4)
dev.off()
}
create_dist_vs_avgDiffScore_perTreatment_oe <- function(data, figDir, name, loopList, colorList, se = FALSE){
avg_scores_long <- data %>%
group_by(distance, Anno2) %>%
summarise(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
avg_scores_long$Anno2 <- factor(avg_scores_long$Anno2, level = loopList)
p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno2, fill = Anno2)) +
geom_hline(yintercept = 0) +
geom_smooth(show.legend = TRUE, se = se) +
theme_classic() + scale_x_continuous(labels = label_kb_mb) +
scale_color_manual(values = colorList) +
scale_fill_manual(values = colorList) +
labs(title = paste0(name),
x = "Distance",
y = "Average Diff Score") +
theme(plot.title = element_text(size = 8))
fileName <- paste0("dist_vs_score_difflinePlot_", name)
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = 4, height = 3)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 4, height = 3)
print(p4)
dev.off()
}
create_dist_barplot_oe <- function(data, figDir, name, note, loopList, diffCutoff, distanceFilter = 2*e6){
data <- data %>%
dplyr::mutate(distance = start2 - start1,
updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP",
ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
dplyr::filter(Anno2 %in% loopList,
distance < distanceFilter)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
dplyr::filter(Anno2 %in% loopList,
updown_dTAG_DMSO %in% c("UP", "NO", "DOWN"))
p <- ggplot(temp, aes(x = updown_dTAG_DMSO, y = distance)) +
geom_violin(aes(fill = updown_dTAG_DMSO)) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = updown_dTAG_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + ggtitle(note) +
scale_y_continuous(labels = label_kb_mb)
fileName <- paste0("size_barplot_", name, "_dTAG_vs_DMSO_", note, "_", diffCutoff)
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
temp <- data %>% dplyr::select(Anno2, distance, updown_dTAG_DMSO, updown_A485_DMSO) %>%
dplyr::filter(Anno2 %in% loopList,
updown_A485_DMSO %in% c("UP", "NO", "DOWN"))
p <- ggplot(temp, aes(x = updown_A485_DMSO, y = distance)) +
geom_violin(aes(fill = updown_A485_DMSO)) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = updown_A485_DMSO), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() + ggtitle(note) +
scale_y_continuous(labels = label_kb_mb)
fileName <- paste0("size_barplot_", name, "_A485_vs_DMSO_", note, "_", diffCutoff)
height <- 3
width <- 4
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
create_loop_scatterplot_oe <- function(data, figDir, name, Anno2List, diffCutoff){
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP",
ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
dplyr::filter(Anno2 %in% Anno2List)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
num.up <- (summary(data$updown_dTAG_DMSO))["UP"]
num.no <- (summary(data$updown_dTAG_DMSO))["NO"]
num.down <- (summary(data$updown_dTAG_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)
### Scatterplot
minValue <- -4
maxValue <- 5
data$density <- get_density(data$log_obsexp_DMSO, data$log_obsexp_dTAG, n = 100)
data <- data %>% dplyr::arrange(density)
correlation <- cor(data$log_obsexp_DMSO, data$log_obsexp_dTAG)
p1 <- ggplot(data, aes(x = log_obsexp_DMSO, y = log_obsexp_dTAG, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
annotate("text", x = minValue, y = maxValue, label = paste0("UP: ", num.up, " (", perc.up, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = minValue, y = maxValue-1, label = paste0("NO: ", num.no, " (", perc.no, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = minValue, y = maxValue-2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"),
color = "black", hjust = 0, size = 3) +
theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5)) +
annotate("text", x = minValue, y = maxValue, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
num.up <- (summary(data$updown_A485_DMSO))["UP"]
num.no <- (summary(data$updown_A485_DMSO))["NO"]
num.down <- (summary(data$updown_A485_DMSO))["DOWN"]
num.all <- num.up + num.no + num.down
perc.up <- round(num.up / num.all * 100, 2)
perc.no <- round(num.no / num.all * 100, 2)
perc.down <- round(num.down / num.all * 100, 2)
data$density <- get_density(data$log_obsexp_DMSO, data$log_obsexp_A485, n = 100)
data <- data %>% dplyr::arrange(density)
correlation <- cor(data$log_obsexp_DMSO, data$log_obsexp_A485)
p2 <- ggplot(data, aes(x = log_obsexp_DMSO, y = log_obsexp_A485, color = density)) +
geom_point(show.legend = FALSE) +
scale_color_viridis() +
coord_fixed(xlim = c(minValue, maxValue), ylim = c(minValue, maxValue)) +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_abline(slope = 1, intercept = -diffCutoff, col = "grey", linetype = "dotted") +
geom_abline(slope = 1, intercept = diffCutoff, col = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
annotate("text", x = minValue, y = maxValue, label = paste0("UP: ", num.up, " (", perc.up, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = minValue, y = maxValue-1, label = paste0("NO: ", num.no, " (", perc.no, "%)"),
color = "black", hjust = 0, size = 3) +
annotate("text", x = minValue, y = maxValue-2, label = paste0("DOWN: ", num.down, " (", perc.down, "%)"),
color = "black", hjust = 0, size = 3) +
theme_classic() + ggtitle(name) + theme(plot.title = element_text(size = 5)) +
annotate("text", x = minValue, y = maxValue, label = paste("r =", round(correlation, 2)), size = 5, color = "black")
fileName <- paste0("scatterplot_", name, "_", diffCutoff)
png(here(figDir, paste0(fileName, ".png")), res = 600, units = "in", width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = 5*1.5, height = 2.5*1.5)
print(cowplot::plot_grid(p1, p2, align = "h"))
dev.off()
}
make_diff_bedpe_oe <- function(data, name, Anno2List, outDir, diffCutoff){
data <- data %>%
dplyr::mutate(updown_dTAG_DMSO = ifelse(log_obsexp_diff_dTAG_DMSO > diffCutoff, "UP",
ifelse(log_obsexp_diff_dTAG_DMSO > -diffCutoff, "NO", "DOWN")),
updown_A485_DMSO = ifelse(log_obsexp_diff_A485_DMSO > diffCutoff, "UP",
ifelse(log_obsexp_diff_A485_DMSO > -diffCutoff, "NO", "DOWN"))) %>%
dplyr::filter(Anno2 %in% Anno2List)
data$updown_dTAG_DMSO <- factor(data$updown_dTAG_DMSO, levels = c("UP", "NO", "DOWN"))
data$updown_A485_DMSO <- factor(data$updown_A485_DMSO, levels = c("UP", "NO", "DOWN"))
out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_UP_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_NO_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_dTAG_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_dTAGvsDMSO_DOWN_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "UP") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_UP_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "NO") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_NO_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
out.temp <- data %>% dplyr::filter(updown_A485_DMSO == "DOWN") %>% dplyr::select(c(1, 2, 3, 4, 5, 6))
fwrite(out.temp, here(outDir, paste0(name, "_A485vsDMSO_DOWN_diff", diffCutoff, ".bedpe")),
sep = "\t", col.names = FALSE)
}
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))
# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_DMSO_logOE"), unique(data$Anno2), colorListLoop)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logOE"), unique(data$Anno2), colorListLoop)
# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO) %>%
dplyr::select(distance, log_obsexp_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logOE"), unique(data$Anno2), colorListLoop)
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))
temp <- data %>% dplyr::mutate(distance = start2 - start1,
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO) %>%
dplyr::select(distance, log_obsexp_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
temp <- temp %>% dplyr::mutate(Anno3 = case_when(Anno2 %in% c("S-X", "S-S") ~ "Structural",
Anno2 %in% c("P-P", "P-E", "E-E") ~ "Pure_Regulatory",
Anno2 %in% c("P-S", "P-X", "E-S", "E-X") ~ "Relaxed_Regulatory")) %>%
dplyr::filter(!is.na(Anno3))
loopList <- rev(c("Structural", "Relaxed_Regulatory", "Pure_Regulatory"))
colorList <- rev(c(palette_3[["grey2"]], strong_green, darken(strong_green, amount = 0.5)))
avg_scores_long <- temp %>%
group_by(distance, Anno3) %>%
summarise(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
avg_scores_long$Anno3 <- factor(avg_scores_long$Anno3, level = loopList)
p4 <- ggplot(avg_scores_long, aes(x = distance, y = avg_score, color = Anno3)) +
geom_hline(yintercept = 0,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_smooth(show.legend = TRUE, se = TRUE,
size = lineThick*mmToLineUnit,
lineend = "square",
aes(fill = Anno3)) +
theme_classic() +
scale_x_continuous(labels = label_kb_mb) +
scale_color_manual(values = colorList) +
scale_fill_manual(values = colorList) +
labs(x = "Loop size",
y = "Δ log2(obs/exp)",
color = "Loop types", fill = "Loop types") +
theme(
axis.title = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +
guides(
color = guide_legend(
keywidth = 0.5, # Adjust the width of the legend color squares
keyheight = 0.5 # Adjust the height of the legend color squares
)
)
fileName <- paste0("dist_vs_score_difflinePlot_grouped", name)
width <- panelSize(2.5)*mmToInch
height <- panelSize(1.25)*mmToInch
png(here(figDir, paste0(fileName, ".png")),
res = 600, units = "in", width = width, height = height)
print(p4)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")),
width = width, height = height)
print(p4)
dev.off()
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obs_DMSO = if_else(obs_DMSO == 0, minValue, log2(obs_DMSO)),
log_obs_dTAG = if_else(obs_dTAG == 0, minValue, log2(obs_dTAG)),
log_obs_A485 = if_else(obs_A485 == 0, minValue, log2(obs_A485)))
# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))
# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obs_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_DMSO_logObs"), unique(data$Anno2), colorListLoop)
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obs_dTAG, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logObs"), unique(data$Anno2), colorListLoop)
# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
log_obs_diff_dTAG_DMSO = log_obs_dTAG - log_obs_DMSO) %>%
dplyr::select(distance, log_obs_diff_dTAG_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_dTAG_logObs"), unique(data$Anno2), colorListLoop)
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Import obs/exp scores and merge to the dataset
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id"))
# Plot
temp <- data %>% dplyr::mutate(distance = start2 - start1) %>% dplyr::select(distance, log_obsexp_A485, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgScore_perTreatment_oe(temp, figDir, paste0(name, "_A485_logOE"), unique(data$Anno2), colorListLoop)
# Creating figures per each condition, differential
temp <- data %>% dplyr::mutate(distance = start2 - start1,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO) %>%
dplyr::select(distance, log_obsexp_diff_A485_DMSO, Anno2)
colnames(temp) <- c("distance", "score", "Anno2")
create_dist_vs_avgDiffScore_perTreatment_oe(temp, figDir, paste0(name, "_A485_logOE"), unique(data$Anno2), colorListLoop)
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Import obs/exp scores and merge to the dataset
minValue = -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
diffCutoff <- 0.5
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "all"), unique(data$Anno2), diffCutoff)
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "reg"), c("P-P", "P-E", "E-E"), diffCutoff)
create_loop_scatterplot_oe(data, figDir, paste0(name, "_logOE_", "str"), c("S-S", "S-X"), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "all", unique(data$Anno2), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "reg", c("P-P", "P-E", "E-E"), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "reg_1mb", c("P-P", "P-E", "E-E"), diffCutoff, 1e6)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "str", c("S-S", "S-X"), diffCutoff)
create_dist_barplot_oe(data, figDir, paste0(name, "_logOE"), "str_1mb", c("S-S", "S-X"), diffCutoff, 1e6)
# Import annotation
name <- "chromo_cons_annoHierarchy"
data <- fread(here(consensusDir, paste0(name, ".tsv")))
# Import obs/exp scores and merge to the dataset
minValue = -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)))
# Merge dataset
data <- data %>% dplyr::left_join(obsexp, by = c("id")) %>%
dplyr::mutate(log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
diffCutoff <- 0.5
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "all"), unique(data$Anno2), consensusDir, diffCutoff)
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "pe-pe"), c("P-P", "P-E", "E-E"), consensusDir, diffCutoff)
make_diff_bedpe_oe(data, paste0(name, "_logOE_", "str"), c("S-S", "S-X"), consensusDir, diffCutoff)
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
flankSize <- 1
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")
bed.1 <- gene.tb %>% dplyr::filter(ensembl %in% group1) %>% dplyr::select(c(1, 2, 3))
bed.2 <- gene.tb %>% dplyr::filter(ensembl %in% group2) %>% dplyr::select(c(1, 2, 3))
fwrite(bed.1, here(refDir, "TSS_binaryGroup1.bed"), sep = "\t", col.names = FALSE)
fwrite(bed.2, here(refDir, "TSS_binaryGroup2.bed"), sep = "\t", col.names = FALSE)
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed"))
bed.1 <- gene.tb %>% dplyr::filter(V6 %in% group1)
bed.2 <- gene.tb %>% dplyr::filter(V6 %in% group2)
fwrite(bed.1, here(refDir, "TSS_binaryGroup1_gtf.bed"), sep = "\t", col.names = FALSE)
fwrite(bed.2, here(refDir, "TSS_binaryGroup2_gtf.bed"), sep = "\t", col.names = FALSE)
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
data <- fread(here(refDir, "readCount.filtered.TPM.all.tsv")) %>% dplyr::mutate(
group = case_when(ensembl %in% group1 ~ "group1",
ensembl %in% group2 ~ "group2",
TRUE ~ NA)
) %>%
dplyr::filter(!is.na(group)) %>%
dplyr::select(c(34, 1, 3, 4, 5, 14, 15, 34))
data <- data %>% mutate(average = rowMeans(across(colnames(data)[3:7])))
temp <- data %>% dplyr::select(group, average)
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$average
distance2 <- (data %>% dplyr::filter(group ==group2) )$average
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pval <- convPvalue(getPvalWilcox(temp, "group1", "group2"))
ggplot(temp, aes(x = group, y = average)) + geom_boxplot() + scale_y_log10() +
ylab("avgTPM") + theme_classic() +
annotate("text", x = 1, y = 10000, label = pval)
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
flankSize <- 1
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")
bed.1 <- gene.tb %>% dplyr::filter(ensembl %in% group1) %>% dplyr::select(c(1, 2, 3))
bed.2 <- gene.tb %>% dplyr::filter(ensembl %in% group2) %>% dplyr::select(c(1, 2, 3))
data <- fread(here("../..", "result", "stripenn", "result_filtered.tsv"))
ggplot(data, aes(x = width)) + geom_histogram() + theme_classic() +
scale_x_continuous(labels = label_kb_mb)
## Checking overlap with gene TSS
tss <- fread(here(refDir, "mm10_GRCm38.p6_TSS2.5kb.bed"))
tss.group1 <- tss %>% dplyr::filter(V6 %in% group1) %>% dplyr::select(V1, V2, V3)
colnames(tss.group1) <- c("chr", "start", "end")
tss.group1.gr <- makeGRangesFromDataFrame(tss.group1)
tss.group2 <- tss %>% dplyr::filter(V6 %in% group2) %>% dplyr::select(V1, V2, V3)
colnames(tss.group2) <- c("chr", "start", "end")
tss.group2.gr <- makeGRangesFromDataFrame(tss.group2)
## Anchor of stripes
temp <- data %>% dplyr::select(chr, pos1, pos2)
colnames(temp) <- c("chr", "start", "end")
stripeAnchor.gr <- makeGRangesFromDataFrame(temp)
overlap.group1 <- findOverlaps(tss.group1.gr, stripeAnchor.gr)
overlap.group2 <- findOverlaps(tss.group2.gr, stripeAnchor.gr)
length(unique(queryHits(overlap.group1)))
length(unique(queryHits(overlap.group2)))
## body of stripes
temp <- data %>% dplyr::select(chr, pos3, pos4)
colnames(temp) <- c("chr", "start", "end")
stripeBody.gr <- makeGRangesFromDataFrame(temp)
overlap.group1 <- findOverlaps(tss.group1.gr, stripeBody.gr)
overlap.group2 <- findOverlaps(tss.group2.gr, stripeBody.gr)
length(unique(queryHits(overlap.group1)))
length(unique(queryHits(overlap.group2)))
## Checking RAD21/CTCF presence at anchor
overlap.body.ctcf <- findOverlaps(stripeBody.gr, peak.CTCF)
overlap.body.rad21 <- findOverlaps(stripeBody.gr, peak.RAD21)
overlap.anchor.ctcf <- findOverlaps(stripeAnchor.gr, peak.CTCF)
overlap.anchor.rad21 <- findOverlaps(stripeAnchor.gr, peak.RAD21)
length(unique(queryHits(overlap.body.ctcf)))
length(unique(queryHits(overlap.body.rad21)))
length(unique(queryHits(overlap.anchor.ctcf)))
length(unique(queryHits(overlap.anchor.rad21)))
## Checking overlap with loop anchor (PE-PE)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)
overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)
length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))
## Checking overlap with loop anchor (ALL)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)
overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)
length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))
## Checking overlap with loop anchor (Structure)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)
overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)
length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))
length(unique(subjectHits(overlap.body.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.body.down)))/length(anchor.down)*100
length(unique(subjectHits(overlap.anchor.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.anchor.down)))/length(anchor.down)*100
## Checking overlap with loop anchor (RelaxedReg)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_regulatory_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)
overlap.body.upno <- findOverlaps(stripeBody.gr, anchor.upno)
overlap.body.down <- findOverlaps(stripeBody.gr, anchor.down)
overlap.anchor.upno <- findOverlaps(stripeAnchor.gr, anchor.upno)
overlap.anchor.down <- findOverlaps(stripeAnchor.gr, anchor.down)
length(unique(subjectHits(overlap.body.upno)))
length(unique(subjectHits(overlap.body.down)))
length(unique(subjectHits(overlap.anchor.upno)))
length(unique(subjectHits(overlap.anchor.down)))
length(unique(subjectHits(overlap.body.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.body.down)))/length(anchor.down)*100
length(unique(subjectHits(overlap.anchor.upno)))/length(anchor.upno)*100
length(unique(subjectHits(overlap.anchor.down)))/length(anchor.down)*100
results <- tibble(i = numeric(), upno = numeric(), down = numeric())
for(i in seq(1, 10)){
data <- fread(here(refDir, "Dylan_hub_esc.csv"))
data.hub <- data %>% dplyr::filter(all_lcon > i)
temp <- data.hub %>% dplyr::select(c(1, 2, 3))
colnames(temp) <- c("chr", "start", "end")
hub.anchor <- makeGRangesFromDataFrame(temp)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_all_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)
overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
overlap.down <- findOverlaps(anchor.down, hub.anchor)
n1 <- length(unique(queryHits(overlap.upno)))
n2 <- length(unique(queryHits(overlap.down)))
perc1 <- round(n1/length(overlap.upno)*100, 2)
perc2 <- round(n2/length(overlap.down)*100, 2)
results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}
results_long <- results %>%
pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")
results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))
# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "All loops",
x = "Presence of hub anchor with >i connections",
y = "Percentage"
) +
theme_minimal() +
scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
loop.upno <- (bind_rows(loop.up, loop.no))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
getOverlapLoopNum <- function(loop, peak){
anchor1 <- GRanges(seqnames = loop$V1, ranges = IRanges(start = loop$V2, end = loop$V3))
anchor2 <- GRanges(seqnames = loop$V4, ranges = IRanges(start = loop$V5, end = loop$V6))
a <- queryHits(findOverlaps(anchor1, peak))
b <- queryHits(findOverlaps(anchor2, peak))
return(length(unique(c(a, b))))
}
getSEOverlapFisher <- function(allLoop, subsetLoop, peak){
all.overlap <- getOverlapLoopNum(allLoop, peak)
all.notOverlap <- nrow(allLoop) - all.overlap
subset.overlap <- getOverlapLoopNum(subsetLoop, peak)
subset.notOverlap <- nrow(subsetLoop) - subset.overlap
contingency_table <- matrix(c(subset.overlap, subset.notOverlap,
all.overlap, all.notOverlap), nrow = 2, byrow = TRUE)
colnames(contingency_table) <- c("Overlapping", "Not_Overlapping")
rownames(contingency_table) <- c("All loops", "Subset loops")
# Perform Fisher's Exact Test
fisher_test_result <- fisher.test(contingency_table)
return(fisher_test_result)
}
name <- "chromo_cons_annoHierarchy"
loop <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe.bedpe"))
i <- 2
data <- fread(here(refDir, "Dylan_hub_esc.csv"))
data.hub <- data %>% dplyr::filter(all_lcon > i)
temp <- data.hub %>% dplyr::select(c(1, 2, 3))
colnames(temp) <- c("chr", "start", "end")
hub.anchor <- makeGRangesFromDataFrame(temp)
temp <- getSEOverlapFisher(loop, loop.upno, hub.anchor)
result.tb <- tibble(loopType = "UP/NO",
target = paste0("hub_", i),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
temp <- getSEOverlapFisher(loop, loop.down, hub.anchor)
result.tb <- result.tb %>%
add_row(loopType = "DOWN",
target = paste0("hub_", i),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
for(i in c(5)){
data <- fread(here(refDir, "Dylan_hub_esc.csv"))
data.hub <- data %>% dplyr::filter(all_lcon > i)
temp <- data.hub %>% dplyr::select(c(1, 2, 3))
colnames(temp) <- c("chr", "start", "end")
hub.anchor <- makeGRangesFromDataFrame(temp)
temp <- getSEOverlapFisher(loop, loop.upno, hub.anchor)
result.tb <- result.tb %>% add_row(loopType = "UP/NO",
target = paste0("hub_", i),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
temp <- getSEOverlapFisher(loop, loop.down, hub.anchor)
result.tb <- result.tb %>%
add_row(loopType = "DOWN",
target = paste0("hub_", i),
pvalue = temp$p.value,
oddsRatio = temp$estimate)
}
library(circlize)
data <- result.tb
heatmap_data <- data %>% dplyr::select(target, loopType, oddsRatio) %>%
pivot_wider(names_from = loopType, values_from = oddsRatio) %>%
column_to_rownames(var = "target")
pvalue_data <- data %>% dplyr::select(target, loopType, pvalue) %>%
pivot_wider(names_from = loopType, values_from = pvalue) %>%
column_to_rownames(var = "target")
# col_fun <- colorRamp2(c(0, 1, 2),
# c("blue", "white", "red"))
data$target <- factor(data$target, levels = c("hub_5", "hub_2"))
data$loopType <- factor(data$loopType, levels = c("UP/NO", "DOWN"))
p <- ggplot(data, aes(x = loopType, y = target, size = -log10(pvalue), fill = oddsRatio)) +
geom_point(shape = 21, # Ensures a point with an outline
stroke = 1*ptToMM # Line width for the border
) + theme_bw() +
scale_size_continuous(range = c(1, 3)) + # Set min and max point sizes here
scale_fill_gradientn(colors = c("#4852A0", "white", "#CB333A"), # Define gradient colors
values = scales::rescale(c(0.5, 1, 1.5)), limits = c(0.5, 1.5),
#low = "white", high = "#CB333A",
# limits = c(1, 3),
oob = scales::squish, # Define gradient colors
guide = guide_colorbar(
barwidth = 1.5/5.08, # Adjust width of the color bar
barheight = 15/5.08 # Adjust height of the color bar
)
) + labs(x = NULL, y = NULL) +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- here(figDir, "heatmap_hub_enrichment_dotplot_all")
width <- panelSize(1.65)*mmToInch
height <- panelSize(1.1)*mmToInch
# # png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
# # print(p)
# # dev.off()
svglite(paste0(fileName, ".svg"), height = height, width = width)
print(p)
dev.off()
results <- tibble(i = numeric(), upno = numeric(), down = numeric())
for(i in seq(1, 10)){
data <- fread(here(refDir, "Dylan_hub_esc.csv"))
data.hub <- data %>% dplyr::filter(all_lcon > i)
temp <- data.hub %>% dplyr::select(c(1, 2, 3))
colnames(temp) <- c("chr", "start", "end")
hub.anchor <- makeGRangesFromDataFrame(temp)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)
overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
overlap.down <- findOverlaps(anchor.down, hub.anchor)
n1 <- length(unique(queryHits(overlap.upno)))
n2 <- length(unique(queryHits(overlap.down)))
perc1 <- round(n1/length(overlap.upno)*100, 2)
perc2 <- round(n2/length(overlap.down)*100, 2)
results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}
results_long <- results %>%
pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")
results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))
# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "PE-PE loops",
x = "Presence of hub anchor with >i connections",
y = "Percentage"
) +
theme_minimal() +
scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)
results <- tibble(i = numeric(), upno = numeric(), down = numeric())
for(i in seq(1, 10)){
data <- fread(here(refDir, "Dylan_hub_esc.csv"))
data.hub <- data %>% dplyr::filter(all_lcon > i)
temp <- data.hub %>% dplyr::select(c(1, 2, 3))
colnames(temp) <- c("chr", "start", "end")
hub.anchor <- makeGRangesFromDataFrame(temp)
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_UP_diff0.2.bedpe"))
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_NO_diff0.2.bedpe"))
anchor.upno <- extractAnchor(bind_rows(loop.up, loop.no))
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_structure_dTAGvsDMSO_DOWN_diff0.2.bedpe"))
anchor.down <- extractAnchor(loop.down)
overlap.upno <- findOverlaps(anchor.upno, hub.anchor)
overlap.down <- findOverlaps(anchor.down, hub.anchor)
n1 <- length(unique(queryHits(overlap.upno)))
n2 <- length(unique(queryHits(overlap.down)))
perc1 <- round(n1/length(overlap.upno)*100, 2)
perc2 <- round(n2/length(overlap.down)*100, 2)
results <- results %>% add_row(i = i, upno = perc1, down = perc2)
}
results_long <- results %>%
pivot_longer(cols = c(upno, down), names_to = "Group", values_to = "Percentage")
results_long$Group <- factor(results_long$Group, levels = c("upno", "down"))
# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Structure loops",
x = "Presence of hub anchor with >i connections",
y = "Percentage"
) +
theme_minimal() +
scale_fill_manual(values = c("upno" = "blue", "down" = "red")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylim(0, 100)
results <- tibble(i = numeric(), perc1 = numeric(), perc2 = numeric())
for(i in seq(1, 10)){
data <- fread(here(refDir, "Dylan_hub_episc.csv"))
data.hub <- data %>% dplyr::filter(all_lcon > i)
temp <- data.hub %>% dplyr::select(c(1, 2, 3))
colnames(temp) <- c("chr", "start", "end")
hub.anchor <- makeGRangesFromDataFrame(temp)
## Checking overlap with gene TSS
tss <- fread(here(refDir, "mm10_GRCm38.p6_TSS2.5kb.bed"))
tss.group1 <- tss %>% dplyr::filter(V6 %in% group1) %>% dplyr::select(V1, V2, V3)
colnames(tss.group1) <- c("chr", "start", "end")
tss.group1.gr <- makeGRangesFromDataFrame(tss.group1)
tss.group2 <- tss %>% dplyr::filter(V6 %in% group2) %>% dplyr::select(V1, V2, V3)
colnames(tss.group2) <- c("chr", "start", "end")
tss.group2.gr <- makeGRangesFromDataFrame(tss.group2)
overlap.group1 <- findOverlaps(tss.group1.gr, hub.anchor)
overlap.group2 <- findOverlaps(tss.group2.gr, hub.anchor)
n1 <- length(unique(queryHits(overlap.group1)))
n2 <- length(unique(queryHits(overlap.group2)))
perc1 <- round(n1/nrow(tss.group1)*100, 2)
perc2 <- round(n2/nrow(tss.group2)*100, 2)
results <- results %>% add_row(i = i, perc1 = perc1, perc2 = perc2)
}
results_long <- results %>%
pivot_longer(cols = c(perc1, perc2), names_to = "Group", values_to = "Percentage")
# Create the bar plot
ggplot(results_long, aes(x = factor(i), y = Percentage, fill = Group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Percentage by 'i' for perc1 and perc2",
x = "Presence of hub anchor with >i connections",
y = "Percentage"
) +
theme_minimal() +
scale_fill_manual(values = c("perc1" = "blue", "perc2" = "red")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
###[2.30] H3K27ac peak number per 10kb
process_group <- function(group_name, gene_data, bins) {
# Filter for the specific group
TSS_group <- makeGRangesFromDataFrame(
gene_data %>% filter(ensembl %in% get(group_name)),
keep.extra.columns = TRUE
)
# Process each TSS in the group
all_results <- map_dfr(seq_len(length(TSS_group)), function(i) {
# Find overlapping bins
overlapping_bins <- findOverlaps(TSS_group[i], bins)
bins_near_tss <- as_tibble(bins[subjectHits(overlapping_bins)])
# Calculate distance bin
TSScenter <- (as_tibble(TSS_group[i]) %>%
mutate(center = (start + end) / 2))$center
centerBinStart <- floor(TSScenter / 10e3) * 10e3 + 1
bins_near_tss <- bins_near_tss %>%
mutate(distanceBin = abs((start - centerBinStart)) / 10e3)
# Summarize results
result <- bins_near_tss %>%
group_by(distanceBin) %>%
summarise(mean_peak_counts = mean(peak_counts, na.rm = TRUE), .groups = "drop") %>%
mutate(gene = TSS_group[i]$ensembl)
return(result)
})
# Calculate mean and SD for the group
mean_data <- all_results %>%
group_by(distanceBin) %>%
summarise(mean_peak_counts = mean(mean_peak_counts, na.rm = TRUE))
sd_data <- all_results %>%
group_by(distanceBin) %>%
summarise(sd_peak_counts = sd(mean_peak_counts, na.rm = TRUE))
# Join and add group name
summary_data <- left_join(mean_data, sd_data, by = "distanceBin") %>%
mutate(group = group_name)
return(summary_data)
}
##########################################################
## Import peak
refDir <- here("../..", "reference")
#peak.H3K27ac <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
peak.H3K27ac <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
#peak.H3K27ac <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
## Import 10kb bin
temp <- fread(here(refDir, "mm10.bin.10kb.bed"))
colnames(temp) <- c("chr", "start", "end")
temp <- temp %>% dplyr::mutate(start = start+1)
bins.10kb <- makeGRangesFromDataFrame(temp)
## Count overlap
counts <- countOverlaps(bins.10kb, peak.H3K27ac)
mcols(bins.10kb)$peak_counts <- counts
## Getting TSS
flankSize <- 1e6
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V6)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl")
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
#TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
#TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)
##########################################################
# List of groups to process
groups <- c("gene.group1", "gene.group2")
# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))
# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
mutate(distanceBin = distanceBin + 1) %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2")
# Plot the results
p <- ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) +
geom_line(size = 0.5) +
labs(
x = "Distance bin (10kb)",
y = "H3K4me3 peak count"
) +
theme_classic() +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS),
legend.position = c(0.8, 0.8) # Moves legend inside the plot (x, y relative coordinates)
) +
guides(
fill = guide_legend(
keywidth = 0.2, # Adjust the width of the legend keys
keyheight = 0.2 # Adjust the height of the legend keys
))+
scale_color_manual(values = rev(c("#777777", "#F28E2C")))
##########################################################
fileName <- paste0("binPeakDensity_RAD21_binaryGroup")
width <- 32*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene
# List of groups to process
groups <- c("psOver0", "psOver1", "psOver2", "psOver3", "psOver4")
# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))
# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
mutate(distanceBin = distanceBin + 1) %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver0") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver1") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver2") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver3")%>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "psOver4")
# Plot the results
p <- ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) +
geom_line(size = 0.5) +
labs(
x = "Distance bin (10kb)",
y = "H3K4me3 peak count"
) +
theme_classic() +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS),
legend.position = c(0.8, 0.8) # Moves legend inside the plot (x, y relative coordinates)
) +
guides(
fill = guide_legend(
keywidth = 0.2, # Adjust the width of the legend keys
keyheight = 0.2 # Adjust the height of the legend keys
))+
scale_color_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))
fileName <- paste0("binPeakDensity_RAD21_psGroup11")
width <- 32*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### PE
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene
# List of groups to process
groups <- c("peOver0", "peOver1", "peOver2", "peOver3", "peOver4")
# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))
# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
mutate(distanceBin = distanceBin + 1) %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver0") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver1") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver2") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver3")%>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "peOver4")
# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) +
geom_line(size = 1) + xlim(0, 10)+
labs(
title = "Mean Peak Counts",
x = "Distance Bin (10 kb)",
y = "Mean Peak Counts"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 13)
)
###############
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene
# List of groups to process
groups <- c("ppOver0", "ppOver1", "ppOver2", "ppOver3", "ppOver4")
# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))
# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
mutate(distanceBin = distanceBin + 1) %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver0") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver1") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver2") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver3")%>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "ppOver4")
# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) +
geom_line(size = 1) +
labs(
title = "Mean Peak Counts",
x = "Distance Bin (10 kb)",
y = "Mean Peak Counts"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 13)
)
peak.Whyte.SE <- importPeak(here(refDir, "superEnhancer_Whyte_ESC_mm10.bed"))
peak.Dylan.SE <- importPeak(here(refDir, "superEnhancer_Dylan_ESC.bed"))
flankSize <- 0.5e6
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)
length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Whyte.SE))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, peak.Whyte.SE))))/length(gene.group2)*100
length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, peak.Dylan.SE))))/length(gene.group2)*100
#View(as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Whyte.SE)))]))
#View(as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE)))]))
gene.group1.SE <- (as_tibble(TSS1mb.group1.gr[unique(queryHits(findOverlaps(TSS1mb.group1.gr, peak.Dylan.SE)))]))$ensembl
gene.group1.noSE <- gene.group1[!(gene.group1 %in% gene.group1.SE)]
overlaps <- findOverlaps(TSS1mb.group2.gr, peak.Dylan.SE)
unique_hits <- unique(queryHits(overlaps))
gene.group2.SE <- as_tibble(TSS1mb.group2.gr[unique_hits])$ensembl
gene.group2.noSE <- as_tibble(TSS1mb.group2.gr[-unique_hits])$ensembl
##########################################################
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - 1e6,
TSSend = TSS + 1e6) %>%
dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")
##########################################################
# List of groups to process
groups <- c("gene.group1.SE", "gene.group1.noSE")
# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))
# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
mutate(distanceBin = distanceBin + 1) %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.SE") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.noSE")
# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) +
geom_line(size = 1) +
labs(
title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
x = "Distance Bin (10 kb)",
y = "Mean Peak Counts"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 13)
)
##########################################################
# List of groups to process
groups <- c("gene.group1.SE", "gene.group1.noSE", "gene.group2.SE", "gene.group2.noSE")
# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))
# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
mutate(distanceBin = distanceBin + 1) %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.SE") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group1.noSE") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.SE") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.noSE")
# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) +
geom_line(size = 1) +
labs(
title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
x = "Distance Bin (10 kb)",
y = "Mean Peak Counts"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 13)
)
##########################################################
# List of groups to process
groups <- c("gene.group2.SE", "gene.group2.noSE")
# Process each group and combine results
summary_data <- map_dfr(groups, ~ process_group(.x, gene.tb, bins.10kb))
# Add the additional rows for distanceBin = 0
summary_data <- summary_data %>%
mutate(distanceBin = distanceBin + 1) %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.SE") %>%
add_row(distanceBin = 0, mean_peak_counts = 0, sd_peak_counts = 0, group = "gene.group2.noSE")
# Plot the results
ggplot(summary_data, aes(x = distanceBin, y = mean_peak_counts, color = group)) +
geom_line(size = 1) +
labs(
title = paste0("Mean Peak Counts, +-", flankSize, "bp, Whyte SE"),
x = "Distance Bin (10 kb)",
y = "Mean Peak Counts"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 13)
)
## Checking P-S in the subset of genes
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
temp3 <- temp2 %>% dplyr::filter(gene %in% c(gene.group1)) %>%
dplyr::mutate(group = case_when(gene %in% gene.group1.SE ~ "SE",
TRUE ~ "noSE"))
ggplot(temp3, aes(x = group, y = num_ps)) + geom_violin(aes(fill = group)) + geom_boxplot(width = 0.1) + theme_classic()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$num_pp
distance2 <- (data %>% dplyr::filter(group ==group2) )$num_pp
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
getPvalWilcox(temp3, "noSE", "SE")
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff)%>%
dplyr::mutate(distance = start2 - start1)
geneAnnoData <- geneAnnoData %>% unnest(gene)
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V6, TSS)
colnames(gene.tb) <- c("ensembl", "TSS")
geneAnnoData <- geneAnnoData %>% dplyr::left_join(gene.tb, by = c("gene" = "ensembl"))
data <- geneAnnoData %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 29, 31, 35)) %>%
rowwise() %>%
dplyr::mutate(distToAnchor1 = (start1 + end1)/2 - TSS,
distToAnchor2 = (start2 + end2)/2 - TSS,
distToAnchor = if_else(abs(distToAnchor1) > abs(distToAnchor2), distToAnchor1, distToAnchor2),
direction = if_else(distToAnchor > 0, "right", "left"))
data.directionality <- data %>% group_by(gene) %>%
summarise(count = n(),
n_right = sum(direction == "right"),
n_left = sum(direction == "left")) %>%
dplyr::filter(count > 1) %>%
dplyr::mutate(n = n_right + n_left,
directionality = abs((n_right - n_left)/n))
#ggplot(data.directionality, aes(x = directionality)) + geom_histogram() + theme_classic()
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
temp <- data.directionality %>% dplyr::filter(gene %in% c(gene.group1, gene.group2)) %>%
dplyr::mutate(group = if_else(gene %in% gene.group1, "group1", "group2"))
ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) +
geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-S") +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
#########################PS
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$directionality
distance2 <- (data %>% dplyr::filter(group ==group2) )$directionality
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
#####################
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene
temp <- data.directionality %>% dplyr::filter(gene %in% c(psOver0, psOver1, psOver2, psOver3, psOver4)) %>%
dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
gene %in% psOver1 ~ "psOver1",
gene %in% psOver2 ~ "psOver2",
gene %in% psOver3 ~ "psOver3",
gene %in% psOver4 ~ "psOver4"))
p34 <- round(getPvalWilcox(temp, "psOver3", "psOver4"), 5)
p23 <- round(getPvalWilcox(temp, "psOver2", "psOver3"), 5)
p12 <- round(getPvalWilcox(temp, "psOver1", "psOver2"), 5)
p01 <- round(getPvalWilcox(temp, "psOver0", "psOver1"), 5)
p04 <- round(getPvalWilcox(temp, "psOver0", "psOver4"), 5)
ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) +
geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
"p23: ", convPvalue(p23), "\n",
"p12: ", convPvalue(p12), "\n",
"p01: ", convPvalue(p01), "\n",
"p04: ", convPvalue(p04), "\n"),
color = "black", hjust = 0, size = 3)
#################### pe
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene
temp <- data.directionality %>% dplyr::filter(gene %in% c(peOver0, peOver1, peOver2, peOver3, peOver4)) %>%
dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
gene %in% peOver1 ~ "peOver1",
gene %in% peOver2 ~ "peOver2",
gene %in% peOver3 ~ "peOver3",
gene %in% peOver4 ~ "peOver4"))
p34 <- round(getPvalWilcox(temp, "peOver3", "peOver4"), 5)
p23 <- round(getPvalWilcox(temp, "peOver2", "peOver3"), 5)
p12 <- round(getPvalWilcox(temp, "peOver1", "peOver2"), 5)
p01 <- round(getPvalWilcox(temp, "peOver0", "peOver1"), 5)
p04 <- round(getPvalWilcox(temp, "peOver0", "peOver4"), 5)
ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) +
geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
"p23: ", convPvalue(p23), "\n",
"p12: ", convPvalue(p12), "\n",
"p01: ", convPvalue(p01), "\n",
"p04: ", convPvalue(p04), "\n"),
color = "black", hjust = 0, size = 3)
#################### pp
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene
temp <- data.directionality %>% dplyr::filter(gene %in% c(ppOver0, ppOver1, ppOver2, ppOver3, ppOver4)) %>%
dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
gene %in% ppOver1 ~ "ppOver1",
gene %in% ppOver2 ~ "ppOver2",
gene %in% ppOver3 ~ "ppOver3",
gene %in% ppOver4 ~ "ppOver4"))
p34 <- round(getPvalWilcox(temp, "ppOver3", "ppOver4"), 5)
p23 <- round(getPvalWilcox(temp, "ppOver2", "ppOver3"), 5)
p12 <- round(getPvalWilcox(temp, "ppOver1", "ppOver2"), 5)
p01 <- round(getPvalWilcox(temp, "ppOver0", "ppOver1"), 5)
p04 <- round(getPvalWilcox(temp, "ppOver0", "ppOver4"), 5)
ggplot(temp, aes(x = group, y = directionality)) + geom_violin() + geom_boxplot(width = 0.1) +
geom_hline(yintercept = 0) + theme_classic() + ggtitle("P-N") +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
annotate("text", x = 1, y = 0.5, label = paste0("p34: ", convPvalue(p34), "\n",
"p23: ", convPvalue(p23), "\n",
"p12: ", convPvalue(p12), "\n",
"p01: ", convPvalue(p01), "\n",
"p04: ", convPvalue(p04), "\n"),
color = "black", hjust = 0, size = 3)
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff)%>%
dplyr::mutate(distance = start2 - start1)
geneAnnoData <- geneAnnoData %>% unnest(gene)
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V6, TSS)
colnames(gene.tb) <- c("ensembl", "TSS")
geneAnnoData <- geneAnnoData %>% dplyr::left_join(gene.tb, by = c("gene" = "ensembl"))
data <- geneAnnoData %>% dplyr::select(c(1, 2, 3, 4, 5, 6, 29, 31, 35)) %>%
rowwise() %>%
dplyr::mutate(distToAnchor1 = (start1 + end1)/2 - TSS,
distToAnchor2 = (start2 + end2)/2 - TSS,
distToAnchor = if_else(abs(distToAnchor1) > abs(distToAnchor2), distToAnchor1, distToAnchor2),
direction = if_else(distToAnchor > 0, "right", "left"))
result <- data %>%
group_by(gene, direction) %>%
mutate(distToAnchor = abs(distToAnchor)) %>%
mutate(max_dist_P_S = ifelse(any(Anno2 == "P-S"),
max(distToAnchor[Anno2 == "P-S"], na.rm = TRUE),
NA)) %>%
mutate(is_smaller = ifelse(is.na(max_dist_P_S), NA,
ifelse(distToAnchor == max_dist_P_S, NA, distToAnchor < max_dist_P_S))) %>%
ungroup()
result2 <- result %>% dplyr::filter(!is.na(is_smaller))
result3 <- result2 %>% group_by(gene, direction) %>%
summarize(count_true = sum(is_smaller, na.rm = TRUE),
count_false = sum(is_smaller == FALSE, na.rm = TRUE)) %>%
dplyr::mutate(count = count_true + count_false,
perc = count_true/count*100) %>%
dplyr::select(gene, direction, perc)
result3 <- result3 %>% group_by(gene) %>% summarize(perc = mean(perc))
####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene
result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
gene %in% psOver1 ~ "psOver1",
gene %in% psOver2 ~ "psOver2",
gene %in% psOver3 ~ "psOver3",
gene %in% psOver4 ~ "psOver4",
TRUE ~ NA)) %>%
dplyr::filter(!is.na(group))
ggplot(result4, aes(x = group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
####### DOing this for P-E groups
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene
result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
gene %in% peOver1 ~ "peOver1",
gene %in% peOver2 ~ "peOver2",
gene %in% peOver3 ~ "peOver3",
gene %in% peOver4 ~ "peOver4",
TRUE ~ NA)) %>%
dplyr::filter(!is.na(group))
ggplot(result4, aes(x = group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
####### DOing this for P-P groups
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene
result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
gene %in% ppOver1 ~ "ppOver1",
gene %in% ppOver2 ~ "ppOver2",
gene %in% ppOver3 ~ "ppOver3",
gene %in% ppOver4 ~ "ppOver4",
TRUE ~ NA)) %>%
dplyr::filter(!is.na(group))
ggplot(result4, aes(x = group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
result <- data %>%
group_by(gene, direction) %>%
mutate(distToAnchor = abs(distToAnchor)) %>%
mutate(max_dist_P_S = ifelse(any(Anno2 == "P-S"),
max(distToAnchor[Anno2 == "P-S"], na.rm = TRUE),
NA))
result <- result %>% dplyr::filter(Anno2 != "P-S") %>%
mutate(is_smaller = ifelse(is.na(max_dist_P_S), NA,
ifelse(distToAnchor == max_dist_P_S, NA, distToAnchor < max_dist_P_S))) %>%
ungroup()
result2 <- result %>% dplyr::filter(!is.na(is_smaller))
result3 <- result2 %>% group_by(gene, direction) %>%
summarize(count_true = sum(is_smaller, na.rm = TRUE),
count_false = sum(is_smaller == FALSE, na.rm = TRUE)) %>%
dplyr::mutate(count = count_true + count_false,
perc = count_true/count*100) %>%
dplyr::select(gene, direction, perc)
result3 <- result3 %>% group_by(gene) %>% summarize(perc = mean(perc))
####### DOing this for P-S groups
resultDir <- here("../../result")
temp2 <- readRDS(here(resultDir, "gene_loop_link.rds"))
psOver4 <- (temp2 %>% dplyr::filter(num_ps >= 4))$gene
psOver3 <- (temp2 %>% dplyr::filter(num_ps >= 3, num_ps < 4))$gene
psOver2 <- (temp2 %>% dplyr::filter(num_ps >= 2, num_ps < 3))$gene
psOver1 <- (temp2 %>% dplyr::filter(num_ps >= 1, num_ps < 2))$gene
psOver0 <- (temp2 %>% dplyr::filter(num_ps < 1))$gene
result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% psOver0 ~ "psOver0",
gene %in% psOver1 ~ "psOver1",
gene %in% psOver2 ~ "psOver2",
gene %in% psOver3 ~ "psOver3",
gene %in% psOver4 ~ "psOver4",
TRUE ~ NA)) %>%
dplyr::filter(!is.na(group))
ggplot(result4, aes(x = group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
####### DOing this for P-E groups
peOver4 <- (temp2 %>% dplyr::filter(num_pe >= 4))$gene
peOver3 <- (temp2 %>% dplyr::filter(num_pe >= 3, num_pe < 4))$gene
peOver2 <- (temp2 %>% dplyr::filter(num_pe >= 2, num_pe < 3))$gene
peOver1 <- (temp2 %>% dplyr::filter(num_pe >= 1, num_pe < 2))$gene
peOver0 <- (temp2 %>% dplyr::filter(num_pe < 1))$gene
result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% peOver0 ~ "peOver0",
gene %in% peOver1 ~ "peOver1",
gene %in% peOver2 ~ "peOver2",
gene %in% peOver3 ~ "peOver3",
gene %in% peOver4 ~ "peOver4",
TRUE ~ NA)) %>%
dplyr::filter(!is.na(group))
ggplot(result4, aes(x = group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
####### DOing this for P-P groups
ppOver4 <- (temp2 %>% dplyr::filter(num_pp >= 4))$gene
ppOver3 <- (temp2 %>% dplyr::filter(num_pp >= 3, num_pp < 4))$gene
ppOver2 <- (temp2 %>% dplyr::filter(num_pp >= 2, num_pp < 3))$gene
ppOver1 <- (temp2 %>% dplyr::filter(num_pp >= 1, num_pp < 2))$gene
ppOver0 <- (temp2 %>% dplyr::filter(num_pp < 1))$gene
result4 <- result3 %>% dplyr::mutate(group = case_when(gene %in% ppOver0 ~ "ppOver0",
gene %in% ppOver1 ~ "ppOver1",
gene %in% ppOver2 ~ "ppOver2",
gene %in% ppOver3 ~ "ppOver3",
gene %in% ppOver4 ~ "ppOver4",
TRUE ~ NA)) %>%
dplyr::filter(!is.na(group))
ggplot(result4, aes(x = group, y = perc)) + geom_violin() + geom_boxplot(width = 0.05) + theme_classic() +
stat_summary(aes(group = group), fun = mean, geom = "point", shape = 21, size = 2, fill = "red", color = "black")
peak.CTCF <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
library(BSgenome.Mmusculus.UCSC.mm10)
# Extract sequences
sequences <- getSeq(BSgenome.Mmusculus.UCSC.mm10, peak.CTCF)
# Get the CTCF motif (or provide your own PWM)
ctcf_motif <- query(MotifDb, c("CTCF", "Mmusculus"))[[1]]
# Scan for the motif
motif_hits <- lapply(sequences, function(seq) {
matchPWM(ctcf_motif, seq, min.score = "80%") # Adjust min.score as needed
})
motif_hits_reverse <- lapply(sequences, function(seq) {
matchPWM(ctcf_motif, reverseComplement(seq), min.score = "80%")
})
# Combine results
result <- mapply(function(fwd, rev) {
list(forward = fwd, reverse = rev)
}, motif_hits, motif_hits_reverse, SIMPLIFY = FALSE)
result[[5]]
result_tibble <- tibble(
peak_id = seq_along(result), # Create an identifier for each peak
alignment = map_chr(result, function(res) {
has_fwd <- length(res$forward) > 0
has_rev <- length(res$reverse) > 0
if (has_fwd && has_rev) "fwdrev"
else if (has_fwd) "fwd"
else if (has_rev) "rev"
else "none"
})
)
temp <- as_tibble(peak.CTCF) %>% bind_cols(result_tibble) %>%
dplyr::select(seqnames, start, end, alignment)
fwrite(temp, here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"), col.names = FALSE, sep = "\t")
ctcf.peak <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcf.peak) <- c("chr", "start", "end", "motif")
ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak, keep.extra.columns = TRUE)
flankSize <- 1
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V4, V5, V6)
colnames(gene.TSS.tb) <- c("chr", "start", "end", "strand", "gene", "ensembl")
tss.gr <- makeGRangesFromDataFrame(gene.TSS.tb, keep.extra.columns = TRUE)
fwd_peaks <- ctcf.peak.gr[ctcf.peak.gr$motif %in% c("fwd", "fwdrev")]
rev_peaks <- ctcf.peak.gr[ctcf.peak.gr$motif %in% c("rev", "fwdrev")]
# Find convergent CTCF pairs for each TSS
convergent_pairs <- lapply(seq_along(tss.gr), function(i) {
current_tss <- tss.gr[i]
# Filter peaks on the same chromosome as the TSS
fwd_peaks_chr <- fwd_peaks[as.character(seqnames(fwd_peaks)) == as.character(seqnames(current_tss))]
rev_peaks_chr <- rev_peaks[as.character(seqnames(rev_peaks)) == as.character(seqnames(current_tss))]
# Find the closest forward peak to the left of the TSS
left_fwd <- fwd_peaks_chr[start(fwd_peaks_chr) < start(current_tss)]
closest_fwd <- if (length(left_fwd) > 0) {
left_fwd[which.max(start(left_fwd))]
} else {
NA
}
# Find the closest reverse peak to the right of the TSS
right_rev <- rev_peaks_chr[start(rev_peaks_chr) > start(current_tss)]
closest_rev <- if (length(right_rev) > 0) {
right_rev[which.min(start(right_rev))]
} else {
NA
}
# Combine results if both closest peaks exist
if (!is.na(closest_fwd) && !is.na(closest_rev)) {
tibble(
chrom1 = as.character(seqnames(closest_fwd)),
start1 = start(closest_fwd),
end1 = end(closest_fwd),
chrom2 = as.character(seqnames(closest_rev)),
start2 = start(closest_rev),
end2 = end(closest_rev),
ensembl = current_tss$ensembl,
)
} else {
NULL
}
})
# Combine results into a tibble
result_tibble <- bind_rows(convergent_pairs)
fwrite(result_tibble, here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"), sep = "\t", col.names = FALSE)
fwrite(gene.TSS.tb %>% dplyr::select(c(1, 2, 3, 5)), here(refDir, "tss_2bp.bed"), sep = "\t", col.names = FALSE)
temp <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe")) %>% dplyr::filter(V7 == "Klf4")
fwrite(as_tibble(temp), here(refDir, "tss_convergent_ctcf_motif_pairs_klf4.bedpe"), sep = "\t", col.names = FALSE)
# Checking size distribution of ctcf pair
temp <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"))
temp <- temp %>% dplyr::mutate(size = (V6 + V5)/2 - (V3 + V2)/2,
id = paste(V1, V2, V3, V5, V6, sep = "_"))
temp <- temp %>% dplyr::select(id, size) %>% distinct()
ggplot(temp, aes(x = size)) + geom_histogram() + scale_x_continuous(labels = label_kb_mb, limits = c(0, 1e6)) + scale_y_log10() + theme_classic() + ggtitle("size distribution of smallest convergent ctcf pairs around gene")
flankSize <- 1
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V4, V5, V6)
colnames(gene.TSS.tb) <- c("chr", "start", "end", "strand", "gene", "ensembl")
geneEnsemblPair <- gene.TSS.tb %>% dplyr::select(gene, ensembl)
boundary.pair <- fread(here(refDir, "tss_convergent_ctcf_motif_pairs.bedpe"))
colnames(boundary.pair) <- c("boundary_chrom1", "boundary_start1", "boundary_end1", "boundary_chrom2", "boundary_start2", "boundary_end2", "ensembl")
boundary.pair <- boundary.pair %>% dplyr::left_join(geneEnsemblPair, by = c("ensembl"))
# Importing loop gene annotation
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E")) %>%
dplyr::select(c(1, 2, 3, 4, 5, 6, 11, 12, 24, 29, 31)) %>% unnest(gene)
data <- geneAnnoData %>% dplyr::left_join(boundary.pair, by = c("gene" = "ensembl"))
data <- data %>% rowwise() %>% dplyr::mutate(isWithinLeftBd = (boundary_start1 <= min(end1, end2)),
isWithinRightBd = (boundary_end2 >= max(start1, start2)),
isWithinBd = isWithinLeftBd & isWithinRightBd)
data <- data %>% drop_na()
ggplot(data, aes(x = DMSO, y = dTAG)) + geom_point() + coord_fixed() +
geom_abline(slope = 1, intercept = 0, col = "grey50", linetype = "dashed") +
geom_hline(yintercept = 0, alpha = 0.5, color = "grey") +
geom_vline(xintercept = 0, alpha = 0.5, color = "grey") +
theme_classic() + facet_wrap(~ isWithinBd)
ggplot(data, aes(x = isWithinBd, y = diff_dTAG_DMSO, fill = isWithinBd)) + geom_violin() + geom_boxplot(width = 0.1) + theme_classic() +
geom_hline(yintercept = 0)
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(isWithinBd ==group1) )$diff_dTAG_DMSO
distance2 <- (data %>% dplyr::filter(isWithinBd ==group2) )$diff_dTAG_DMSO
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
getPvalWilcox(data, TRUE, FALSE)
# Proportion of those gene between group1 and group2
data.isWithinBd <- data %>% dplyr::filter(isWithinBd)
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
sum(gene.group1 %in% data.isWithinBd$gene)/length(gene.group1)*100
sum(gene.group2 %in% data.isWithinBd$gene)/length(gene.group2)*100
# Checking size of those loops
data <- data %>% dplyr::mutate(distance = start2 - start1)
ggplot(data, aes(x = isWithinBd, y = distance, fill = isWithinBd)) + geom_violin() + geom_boxplot(width = 0.1) + theme_classic()
flankSize <- 2.5e3
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chr", "start", "end", "ensembl", "gene")
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
TSS1mb.group1.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group1), keep.extra.columns = TRUE)
TSS1mb.group2.gr <- makeGRangesFromDataFrame(gene.tb %>% dplyr::filter(ensembl %in% gene.group2), keep.extra.columns = TRUE)
ctcf.peak <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcf.peak) <- c("chr", "start", "end", "motif")
ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak)
print("+-2.5kb TSS overla with CTCF peak")
length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, ctcf.peak.gr))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, ctcf.peak.gr))))/length(gene.group2)*100
ctcf.peak.gr <- makeGRangesFromDataFrame(ctcf.peak %>% dplyr::filter(motif != "none"))
print("+-2.5kb TSS overla with CTCF peak with motif")
length(unique(queryHits(findOverlaps(TSS1mb.group1.gr, ctcf.peak.gr))))/length(gene.group1)*100
length(unique(queryHits(findOverlaps(TSS1mb.group2.gr, ctcf.peak.gr))))/length(gene.group2)*100
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X")) %>%
dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 11, 12, 24, 29, 31)) %>% unnest(gene)
gene.TSS.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3)) %>%
dplyr::select(V6, TSS)
colnames(gene.TSS.tb) <- c("ensembl", "TSS")
geneAnnoData <- geneAnnoData %>% dplyr::left_join(gene.TSS.tb, by = c("gene" = "ensembl"))
geneAnnoData <- geneAnnoData %>%
dplyr::mutate(distance1 = (start1 + end1)/2 - TSS,
distance2 = (start2 + end2)/2 - TSS,
distance = if_else(abs(distance1) > abs(distance2), distance1, distance2))
result <- geneAnnoData %>%
filter(Anno2 == "P-S") %>%
group_by(gene) %>%
summarise(
farRight = if (any(distance > 0)) max(distance[distance > 0]) else 0, # Return 0 if no positive distance
farLeft = if (any(distance < 0)) min(distance[distance < 0]) else 0 # Return 0 if no negative distance
) %>%
ungroup()
geneAnnoData <- geneAnnoData %>% left_join(result, by = c("gene"))
geneAnnoData <- geneAnnoData %>% dplyr::filter(Anno2 %in% c("P-P", "P-E")) %>%
dplyr::mutate(group = case_when(is.na(farRight) ~ "No",
(distance > farLeft) & (distance < farRight) ~ "Within",
TRUE ~ "Outside"),
size = start2 - start1)
data <- geneAnnoData %>% dplyr::select(group, id, size) %>% distinct()
anchor.data <- geneAnnoData %>% dplyr::select(chrom1, start1, end1, chrom2, start2, end2, group) %>% distinct()
temp1 <- anchor.data %>% dplyr::filter(group == "No") %>% dplyr::select(-group)
fwrite(temp1, here(consensusDir, "insulated_domain_ps_no.bedpe"), sep = "\t", col.names = FALSE)
temp2 <- anchor.data %>% dplyr::filter(group == "Within") %>% dplyr::select(-group)
fwrite(temp2, here(consensusDir, "insulated_domain_ps_within.bedpe"), sep = "\t", col.names = FALSE)
temp3 <- anchor.data %>% dplyr::filter(group == "Outside") %>% dplyr::select(-group)
fwrite(temp3, here(consensusDir, "insulated_domain_ps_outside.bedpe"), sep = "\t", col.names = FALSE)
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$size
distance2 <- (data %>% dplyr::filter(group ==group2) )$size
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))
p <- ggplot(data, aes(x = group, y = size, fill = group)) +
geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() + scale_y_continuous(labels = label_kb_mb) +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "Loop size")
fileName <- paste0("insulationBoundary_size")
width <- panelSize(1.5)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
########################
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$diff_dTAG_DMSO
distance2 <- (data %>% dplyr::filter(group ==group2) )$diff_dTAG_DMSO
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
data <- geneAnnoData %>% dplyr::select(group, id, diff_dTAG_DMSO) %>% distinct()
# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))
p <- ggplot(data, aes(x = group, y = diff_dTAG_DMSO, fill = group)) +
geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "Δ loop score") +
geom_hline(yintercept = 0,
alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_hline(yintercept = -0.2,
alpha = 0.5,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square", linetype = "dashed") +
coord_cartesian(ylim = c(-0.8, 0.5))
fileName <- paste0("insulationBoundary_delta")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
######################
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
geneAnnoData <- geneAnnoData %>% left_join(obsexp, by = c("id"))
data <- geneAnnoData %>% dplyr::select(group, id, log_obsexp_diff_dTAG_DMSO) %>% distinct()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$log_obsexp_diff_dTAG_DMSO
distance2 <- (data %>% dplyr::filter(group ==group2) )$log_obsexp_diff_dTAG_DMSO
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
# convPvalue(getPvalWilcox(data, "noBoundary", "withinBoundary"))
# convPvalue(getPvalWilcox(data, "noBoundary", "outsideBoundary"))
# convPvalue(getPvalWilcox(data, "outsideBoundary", "withinBoundary"))
#
p <- ggplot(data, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) +
geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
scale_fill_manual(values = c("grey50", "#5EC962", "#5EC962")) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "log2(fc of obs/exp)") +
geom_hline(yintercept = 0,
alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
coord_cartesian(ylim = c(-2, 2))
fileName <- paste0("insulationBoundary_obsexp")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#####################
gene.withinBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "withinBoundary"))$gene)
gene.outsideBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "outsideBoundary"))$gene)
gene.noBoundary <- unique((geneAnnoData %>% dplyr::filter(group == "noBoundary"))$gene)
gene.wBoundary <- unique(c(gene.withinBoundary, gene.outsideBoundary))
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
print("perc of gene with loop within Boundary")
sum(gene.group1 %in% gene.withinBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.withinBoundary)/length(gene.group2)*100
print("perc of gene with loop outside Boundary")
sum(gene.group1 %in% gene.outsideBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.outsideBoundary)/length(gene.group2)*100
print("perc of gene with loop with no Boundary")
sum(gene.group1 %in% gene.noBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.noBoundary)/length(gene.group2)*100
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name) %>%
dplyr::mutate(group = case_when(
ensembl_gene_id %in% gene.wBoundary ~ "withinBoundary",
ensembl_gene_id %in% gene.noBoundary ~ "noBoundary",
TRUE ~ NA
)) %>%
dplyr::filter(!is.na(group)) %>%
dplyr::mutate(absLog2FC = abs(log2FoldChange))
ggplot(diff.RNA, aes(x = group, y = log2FoldChange)) + geom_violin() + geom_boxplot(width = 0.1)
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$absLog2FC
distance2 <- (data %>% dplyr::filter(group ==group2) )$absLog2FC
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
convPvalue(getPvalWilcox(diff.RNA, "withinBoundary", "noBoundary"))
# Create the CDF plot
p <- ggplot(diff.RNA, aes(x = absLog2FC, color = group)) +
stat_ecdf(size = 0.4 ) + # Use stat_ecdf to plot the empirical CDF
labs(
x = "Absolute log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 0.5)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("log2FC_cdf_insulationBoundary")
width <- panelSize(2.5)*mmToInch
height <- panelSize(1.5)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
### Checking characterstics of anchors
temp1 <- anchor.data %>% dplyr::filter(group == "noBoundary") %>% dplyr::select(-group)
anchor.noBoundary <- extractAnchor(temp1)
temp2 <- anchor.data %>% dplyr::filter(group == "withinBoundary") %>% dplyr::select(-group)
anchor.withinBoundary <- extractAnchor(temp2)
temp3 <- anchor.data %>% dplyr::filter(group == "outsideBoundary") %>% dplyr::select(-group)
anchor.outsideBoundary <- extractAnchor(temp3)
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(loop ==group1) )$sumScore
distance2 <- (data %>% dplyr::filter(loop ==group2) )$sumScore
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
getSumScores <- function(track, anchor) {
# Find overlaps between all anchors and track regions at once
overlaps <- findOverlaps(anchor, track)
# Extract the scores and corresponding anchor indices
anchor_indices <- queryHits(overlaps)
track_scores <- score(track)[subjectHits(overlaps)]
# Use tapply to calculate the median scores for each anchor
median_scores <- tapply(track_scores, anchor_indices, mean, na.rm = TRUE)
# Initialize a numeric vector to store the median scores for each anchor
all_median_scores <- rep(NA, length(anchor))
# Populate the median scores for the anchors that have overlaps
all_median_scores[as.numeric(names(median_scores))] <- median_scores
return(all_median_scores)
}
plotSumScoresBinary <- function(track, peak, name, anchor1, anchor2, anchor3){
peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
a <- getSumScores(peakTrack, anchor1)
b <- getSumScores(peakTrack, anchor2)
c <- getSumScores(peakTrack, anchor3)
a.tb <- tibble(loop = "noBoundary",
sumScore = a)
b.tb <- tibble(loop = "withBoundary",
sumScore = b)
c.tb <- tibble(loop = "outsideBoundary",
sumScore = c)
data <- bind_rows(a.tb, b.tb, c.tb) %>% drop_na()
data$loop <- factor(data$loop, levels = c("noBoundary", "withBoundary", "outsideBoundary"))
p12 <- getPvalWilcox(data, "noBoundary", "withBoundary")
p13 <- getPvalWilcox(data, "noBoundary", "outsideBoundary")
p23 <- getPvalWilcox(data, "withBoundary", "outsideBoundary")
p <- ggplot(data, aes(x = loop, y = sumScore)) +
labs(x = NULL, y = paste0(name, "\nlog(avg peak score per anchor)")) +
geom_violin(aes(fill = loop), color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.size = 1, outlier.stroke = NA) + theme_classic() +
stat_summary(
aes(group = loop), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) + theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +
#coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.99))) +
annotate(
"text", x = 1, y = quantile(data$sumScore, 0.5),
label = paste0("p12: ", convPvalue(p12), "\n",
"p13: ", convPvalue(p13), "\n",
"p23: ", convPvalue(p23)),
color = "black", hjust = 0, size = 3
)
#+ scale_y_continuous(trans = "log10")
fileName <- paste0("ChIP_peak_avgPeakScore_boundaryAnchor_", name)
width <- panelSize(2)*mmToInch
height <- panelSize(2)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# Sum peak score
####
track <- import(here(refDir, "33255_H3K4me3_04-745_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33255_H3K4me3_04-745_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "H3K4me3", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)
track <- import(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2438476_EC-DG-3458-H3K27AC_ASYN_1.narrowPeak.bed"))
#plotSumScores(track, peak, "H3K27ac")
plotSumScoresBinary(track, peak, "H3K27ac", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)
track <- import(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.black.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "GSM2683440_J1_H3K14ac_mm10Lifted.bed"))
#plotSumScores(track, peak, "H3K14ac")
plotSumScoresBinary(track, peak, "H3K14ac", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)
track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "CTCF")
plotSumScoresBinary(track, peak, "CTCF", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)
track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
#plotSumScores(track, peak, "RAD21")
plotSumScoresBinary(track, peak, "RAD21", anchor.noBoundary, anchor.withinBoundary, anchor.outsideBoundary)
######## LOLA
atac <- fread(here(refDir, "GSM3106257_ATAC_ESC_1.bed")) %>% dplyr::select(V1, V2, V3)
colnames(atac) <- c("chr", "start", "end")
atac.gr <- makeGRangesFromDataFrame(atac)
# LOADING LOOPS
loop.all <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>% dplyr::filter(Anno2 %in% c("P-P", "P-E", "E-E"))
anchor.all <- (extractAnchor(loop.all))
overlaps <- findOverlaps(anchor.all, atac.gr)
anchor.all <- pintersect(anchor.all[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
overlaps <- findOverlaps(anchor.noBoundary, atac.gr)
anchor.noBoundary <- pintersect(anchor.noBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
overlaps <- findOverlaps(anchor.withinBoundary, atac.gr)
anchor.withinBoundary <- pintersect(anchor.withinBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
overlaps <- findOverlaps(anchor.outsideBoundary, atac.gr)
anchor.outsideBoundary <- pintersect(anchor.outsideBoundary[queryHits(overlaps)], atac.gr[subjectHits(overlaps)])
anchors <- list(anchor.noBoundary, anchor.withBoundary, anchor.outsideBoundary)
tbs <- list()
temps <- list()
alpha <- 0.05
# Process clusters c1 to c8
for (i in 1:3) {
anchor <- anchors[[i]]
# Run LOLA
result <- runLOLA(anchor, anchor.all, lolaDB)
tb <- as_tibble(result)
# Filter and summarize
tb <- tb %>%
dplyr::mutate(target = toupper(antibody)) %>%
filter(str_to_lower(cellType) == "embryonic stem cell") %>%
dplyr::filter(qValue < alpha) %>%
dplyr::group_by(target) %>%
slice_min(meanRnk, with_ties = FALSE)
# Store tb
tbs[[i]] <- tb
# Select and rename oddsRatio
temp <- tb %>% dplyr::select(target, oddsRatio)
colnames(temp) <- c("target", paste0("OR_c", i))
# Store temp
temps[[i]] <- temp
}
# Merge all temp tables into one
temp <- Reduce(function(x, y) full_join(x, y, by = "target"), temps) %>%
mutate_all(~replace_na(., 1))
colnames(temp) <- c("target", "noBoundary", "withinBoundary", "outsideBoundary")
data <- temp %>% column_to_rownames("target") %>% as.matrix()
library(circlize)
col_fun <- colorRamp2(c(1, max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
tss.5kb.group1 <- fread(here(refDir, "TSS_binaryGroup1.bed")) %>%
dplyr::mutate(start = (V2 + V3)/2 - 2500,
end = (V2 + V3)/2 + 2500,
chr = V1) %>%
dplyr::select(chr, start, end)
tss.5kb.group1 <- makeGRangesFromDataFrame(tss.5kb.group1)
tss.5kb.group2 <- fread(here(refDir, "TSS_binaryGroup2.bed")) %>%
dplyr::mutate(start = (V2 + V3)/2 - 2500,
end = (V2 + V3)/2 + 2500,
chr = V1) %>%
dplyr::select(chr, start, end)
tss.5kb.group2 <- makeGRangesFromDataFrame(tss.5kb.group2)
plotSumScoresBinary <- function(track, peak, name, anchor1, anchor2){
peakTrack <- track[unique(queryHits(findOverlaps(track, peak)))]
a <- getSumScores(peakTrack, anchor1)
b <- getSumScores(peakTrack, anchor2)
a.tb <- tibble(loop = "group1",
sumScore = a)
b.tb <- tibble(loop = "group2",
sumScore = b)
data <- bind_rows(a.tb, b.tb) %>% drop_na()
data$loop <- factor(data$loop, levels = c("group1", "group2"))
p12 <- getPvalWilcox(data, "group1", "group2")
p <- ggplot(data, aes(x = loop, y = sumScore)) +
labs(x = NULL, y = paste0(name, "\nlog(avg peak score per anchor)")) +
geom_violin(aes(fill = loop), color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.1, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.size = 1, outlier.stroke = NA) + theme_classic() +
stat_summary(
aes(group = loop), fun = mean,
geom = "point", shape = 21, size = 1,
fill = "red", color = "black"
) + theme(
axis.title = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +
#coord_cartesian(ylim = c(quantile(data$sumScore, 0.0), quantile(data$sumScore, 0.99))) +
annotate(
"text", x = 1, y = quantile(data$sumScore, 0.5),
label = paste0("p12: ", convPvalue(p12)),
color = "black", hjust = 0, size = 3
)
#+ scale_y_continuous(trans = "log10")
fileName <- paste0("ChIP_peak_avgPeakScore_binaryGroup_", name)
width <- panelSize(2)*mmToInch
height <- panelSize(2)*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
}
# Sum peak score
####
track <- import(here(refDir, "33248_CTCF_07-729_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "CTCF", tss.5kb.group1, tss.5kb.group2)
track <- import(here(refDir, "33250_RAD21_ab992_Bruce-4_trim_q20_dedup_black_depthNorm.bw"), format = "BigWig")
peak <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
plotSumScoresBinary(track, peak, "RAD21", tss.5kb.group1, tss.5kb.group2)
# Create the data
data <- data.frame(
Group = c("Group1", "Group2"),
Value = c(31.03, 10.64)
)
# Create the bar plot
p <- ggplot(data, aes(x = Group, y = Value, fill = Group)) +
geom_bar(stat = "identity", color = "black", width = 0.7, alpha = 0.6, show.legend = FALSE,
linewidth = lineThick * mmToLineUnit, lineend = "square") + # Use identity for raw values
labs(y = "% of TSS on stripe") +
theme_classic() +
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("perc_on_stripe_binaryGroup")
width <- panelSize(1.25)*mmToInch
height <- panelSize(1.2)*mmToInch
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
###[3.39] Splitting bedpe
temp <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>% dplyr::select(c(1, 2))
geneList <- (temp %>% dplyr::filter(external_gene_name %in% c("Klf4", "Tbx3", "Jun", "Fosl2", "Myc", "Phlda1")))$ensembl_gene_id
# LOAD ANNOTATED LOOP
name <- "chromo_cons_annoHierarchy"
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1)
filteredLoop <- geneAnnoData %>% unnest(gene) %>% dplyr::filter(gene %in% geneList)
fwrite(filteredLoop, here(consensusDir, paste0("P-N_selectedGene.bedpe")),
sep = "\t", col.names = FALSE)
temploop <- filteredLoop %>% dplyr::filter(Anno2 %in% c("P-P", "P-E"))
fwrite(temploop, here(consensusDir, paste0("P-PE_selectedGene.bedpe")),
sep = "\t", col.names = FALSE)
temploop <- filteredLoop %>% dplyr::filter(Anno2 %in% c("P-S"))
fwrite(temploop, here(consensusDir, paste0("P-S_selectedGene.bedpe")),
sep = "\t", col.names = FALSE)
name <- "chromo_cons_annoHierarchy"
# IMPORT S-S loops
allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
strLoops <- allLoops %>% dplyr::filter(Anno2 %in% c("S-S"))
temp <- strLoops %>% dplyr::select(c(1, 2, 6))
colnames(temp) <- c("chrom", "start", "end")
strLoopsGr <- makeGRangesFromDataFrame(temp)
# IMPORT TSS
flankSize <- 10
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chrom", "start", "end", "ensembl", "gene")
geneGr <- makeGRangesFromDataFrame(gene.tb, keep.extra.columns = TRUE)
# Find overlaps between loops and TSS
overlaps <- findOverlaps(geneGr, strLoopsGr)
# Annotate overlaps
tss_with_loops <- geneGr[queryHits(overlaps)]
loops_with_tss <- strLoopsGr[subjectHits(overlaps)]
# Combine into a data frame for processing
loop_data <- data.frame(gene = tss_with_loops$ensembl,
loop_chr = seqnames(loops_with_tss),
loop_start = start(loops_with_tss),
loop_end = end(loops_with_tss),
loop_width = width(loops_with_tss))
# Identify the largest loop for each gene
largest_loops <- loop_data[order(loop_data$gene, -loop_data$loop_width), ]
largest_loops <- largest_loops[!duplicated(largest_loops$gene), ]
largest_loops <- largest_loops %>% dplyr::mutate(loopID = paste(loop_chr, loop_start, loop_end, sep = "_"))
largest_loops <- largest_loops[order(largest_loops$loop_chr, largest_loops$loop_start),]
rownames(largest_loops) <- NULL
# Checking smaller loops
allLoopsGr <- makeGRangesFromDataFrame(allLoops,
seqnames.field = "chrom1",
start.field = "start1",
end.field = "end2")
largestLoopsGr <- makeGRangesFromDataFrame(largest_loops,
seqnames.field = "loop_chr",
start.field = "loop_start",
end.field = "loop_end")
largestLoopsGr <- sort(unique(largestLoopsGr))
complete_overlaps <- subsetByOverlaps(allLoopsGr, largestLoopsGr, type = "within")
overlaps <- findOverlaps(complete_overlaps, largestLoopsGr)
completeOverlapDf <- as.data.frame(complete_overlaps) %>% dplyr::mutate(loopIDAll = paste(seqnames, start, end, sep = "_"))
largestLoopsDf <- as.data.frame(largestLoopsGr) %>% dplyr::mutate(loopID = paste(seqnames, start, end, sep = "_"))
loopIDpairs <- data.frame(loopID = completeOverlapDf$loopIDAll[queryHits(overlaps)],
loopID2 = largestLoopsDf$loopID[subjectHits(overlaps)])
allLoopsAnnotated <- allLoops %>% dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>% dplyr::left_join(loopIDpairs, by = c("loopID"))
temp <- allLoopsAnnotated %>% dplyr::filter(Anno2 %in% c("P-E", "P-P", "E-E")) %>% dplyr::mutate(hasSS = !is.na(loopID2))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(hasSS ==group1) )$diff_dTAG_DMSO
distance2 <- (data %>% dplyr::filter(hasSS ==group2) )$diff_dTAG_DMSO
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, TRUE, FALSE))
ggplot(temp, aes(x = hasSS, y = diff_dTAG_DMSO)) + geom_violin(aes(fill = hasSS), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = hasSS), fun = mean,
geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
ggtitle("PE-PE") + geom_hline(yintercept = 0)
# Counting loop types
allLoopsAnnotatedFiltered <- allLoopsAnnotated %>% dplyr::filter(!is.na(loopID2)) %>%
dplyr::filter(!(loopID == loopID2))
temp <- allLoopsAnnotatedFiltered %>% dplyr::select(loopID2, Anno2)
result <- temp %>%
group_by(loopID2, Anno2) %>%
summarise(count = n(), .groups = 'drop') %>%
group_by(loopID2) %>%
mutate(frequency = count / sum(count)) %>%
arrange(loopID2, Anno2)
result <- largest_loops %>% dplyr::left_join(result, by = c("loopID" = "loopID2"))
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
data <- tibble(
gene = c(gene.group1, gene.group2),
group = c(rep("group1", length(gene.group1)),
rep("group2", length(gene.group2)))
)
data <- data %>% dplyr::left_join(result, by = c("gene"))
data <- data %>% filter(complete.cases(.))
all_anno2 <- unique(data$Anno2)
# Calculate average frequency of Anno2 for each gene in each group
result <- data %>%
# Ensure all possible Anno2 values are present for each gene and group
complete(gene, Anno2 = all_anno2, fill = list(frequency = 0)) %>%
dplyr::mutate(group = case_when(
gene %in% gene.group1 ~ "group1",
gene %in% gene.group2 ~ "group2",
TRUE ~ NA
)) %>%
dplyr::select(group, Anno2, frequency)
ggplot(result, aes(x = Anno2, fill = group, y = frequency)) + geom_boxplot()
# View results
ggplot(largest_loops, aes(x = loop_width)) + geom_histogram()
### Checking how many genes from each group has encompassing S-S
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
data <- tibble(
gene = c(gene.group1, gene.group2),
group = c(rep("group1", length(gene.group1)),
rep("group2", length(gene.group2)))
)
data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
hasSS = ifelse(is.na(loop_width), "NO", "YES")
)
data_summary <- data %>%
group_by(group) %>%
summarize(
total = n(),
hasSS_yes = sum(hasSS == "YES"),
percentage_yes = (hasSS_yes / total) * 100
)
ggplot(data_summary, aes(x = group, y = percentage_yes, fill = group)) +
geom_bar(stat = "identity", color = "black") +
labs(
title = "Percentage of Each Group with hasSS = YES",
x = "Group",
y = "Percentage (%)"
) + ylim(0, 100) +
theme_minimal()
temp <- data %>% dplyr::filter(hasSS == "YES")
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$loop_width
distance2 <- (data %>% dplyr::filter(group ==group2) )$loop_width
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, "group1", "group2"))
ggplot(temp, aes(x = group, y = loop_width)) + geom_violin() + geom_boxplot(width = 0.5) +
annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black")
### Comparing RNA perturbation
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(c(1, 4, 5)) %>%
dplyr::mutate(hasSS = ifelse(ensembl_gene_id %in% largest_loops$gene, "hasSS", "noSS"))
ggplot(diff.RNA, aes(x = abs(log2FoldChange), color = hasSS)) +
stat_ecdf(size = 0.4) +
labs(
x = "Absolute log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 1.5)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
### Checking ChIP peak density
largest_loopsGr <- makeGRangesFromDataFrame(largest_loops,
seqnames.field = "loop_chr",
start.field = "loop_start",
end.field = "loop_end",
keep.extra.columns = TRUE)
# Find overlaps between ChIP-seq peaks and the largest loops
chip_overlaps <- findOverlaps(largest_loopsGr, peak.CTCF)
# Count the number of ChIP-seq peaks per loop
loop_peak_counts <- table(queryHits(chip_overlaps))
# Create a data frame with the counts and loop details
largest_loops$density <- 0 # Initialize density column
largest_loops$count <- 0 # Initialize peak count column
# Add peak counts to the corresponding loops
largest_loops[as.numeric(names(loop_peak_counts)), "count"] <- as.integer(loop_peak_counts)
# Calculate the density (peaks per kilobase)
largest_loops$density <- largest_loops$count / ((largest_loops$loop_width-1) / 1000)
data <- tibble(
gene = c(gene.group1, gene.group2),
group = c(rep("group1", length(gene.group1)),
rep("group2", length(gene.group2)))
)
data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
hasSS = ifelse(is.na(loop_width), "NO", "YES")
) %>% dplyr::filter(hasSS == "YES")
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$density
distance2 <- (data %>% dplyr::filter(group ==group2) )$density
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(data, "group1", "group2"))
ggplot(data, aes(x = group, y = density)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean,
geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
ggtitle("CTCF")
name <- "chromo_cons_annoHierarchy"
# IMPORT S-S LOOPS
allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv"))
strLoops <- allLoops %>% dplyr::filter(Anno2 %in% c("S-S"))
temp <- strLoops %>% dplyr::select(c(1, 2, 3, 5, 6)) # Assuming columns include anchor positions
colnames(temp) <- c("chrom", "start1", "end1", "start2", "end2")
strLoopsGr1 <- makeGRangesFromDataFrame(
temp %>% dplyr::select(chrom, start = start1, end = end1)
)
strLoopsGr2 <- makeGRangesFromDataFrame(
temp %>% dplyr::select(chrom, start = start2, end = end2)
)
# IMPORT CTCF MOTIF ANNOTATION
ctcfMotifs <- fread(here(refDir, "33248_CTCF_07-729_Bruce-4_peaks.mergePeak.motifAnnotated.bed"))
colnames(ctcfMotifs) <- c("chrom", "start", "end", "motif")
ctcfGr <- makeGRangesFromDataFrame(ctcfMotifs, keep.extra.columns = TRUE)
# Annotate each loop anchor with CTCF motifs
anchor1_with_ctcf <- findOverlaps(strLoopsGr1, ctcfGr)
anchor2_with_ctcf <- findOverlaps(strLoopsGr2, ctcfGr)
# Get motif information for anchor1 overlaps
anchor1_ctcf_motif <- rep(NA, length(strLoopsGr1)) # Initialize motif vector
anchor1_ctcf_motif[queryHits(anchor1_with_ctcf)] <- ctcfGr[subjectHits(anchor1_with_ctcf)]$motif
# Get motif information for anchor2 overlaps
anchor2_ctcf_motif <- rep(NA, length(strLoopsGr2)) # Initialize motif vector
anchor2_ctcf_motif[queryHits(anchor2_with_ctcf)] <- ctcfGr[subjectHits(anchor2_with_ctcf)]$motif
# Add motif information to strLoops
strLoops <- strLoops %>%
mutate(
anchor1_motif = anchor1_ctcf_motif,
anchor2_motif = anchor2_ctcf_motif
)
# Filter for convergent CTCF interactions (forward in anchor1, reverse in anchor2)
convergent_loops <- strLoops %>%
filter(anchor1_motif == "fwd" & anchor2_motif == "rev") %>% dplyr::filter(Anno2 %in% c("S-S"))
# Extract loop boundary from left to right
temp <- convergent_loops %>% dplyr::select(c(1, 2, 6))
colnames(temp) <- c("chrom", "start", "end")
strLoopsGr <- makeGRangesFromDataFrame(temp)
# IMPORT TSS
flankSize <- 10
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V1, TSSstart, TSSend, V6, V5)
colnames(gene.tb) <- c("chrom", "start", "end", "ensembl", "gene")
geneGr <- makeGRangesFromDataFrame(gene.tb, keep.extra.columns = TRUE)
# Find overlaps between loops and TSS
overlaps <- findOverlaps(geneGr, strLoopsGr)
# Annotate overlaps
tss_with_loops <- geneGr[queryHits(overlaps)]
loops_with_tss <- strLoopsGr[subjectHits(overlaps)]
# Combine into a data frame for processing
loop_data <- data.frame(gene = tss_with_loops$ensembl,
loop_chr = seqnames(loops_with_tss),
loop_start = start(loops_with_tss),
loop_end = end(loops_with_tss),
loop_width = width(loops_with_tss))
# Identify the largest loop for each gene
largest_loops <- loop_data[order(loop_data$gene, -loop_data$loop_width), ]
largest_loops <- largest_loops[!duplicated(largest_loops$gene), ]
largest_loops <- largest_loops %>% dplyr::mutate(loopID = paste(loop_chr, loop_start, loop_end, sep = "_"))
largest_loops <- largest_loops[order(largest_loops$loop_chr, largest_loops$loop_start),]
rownames(largest_loops) <- NULL
# Checking smaller loops
allLoopsGr <- makeGRangesFromDataFrame(allLoops,
seqnames.field = "chrom1",
start.field = "start1",
end.field = "end2")
largestLoopsGr <- makeGRangesFromDataFrame(largest_loops,
seqnames.field = "loop_chr",
start.field = "loop_start",
end.field = "loop_end")
largestLoopsGr <- sort(unique(largestLoopsGr))
complete_overlaps <- subsetByOverlaps(allLoopsGr, largestLoopsGr, type = "within")
overlaps <- findOverlaps(complete_overlaps, largestLoopsGr)
completeOverlapDf <- as.data.frame(complete_overlaps) %>% dplyr::mutate(loopIDAll = paste(seqnames, start, end, sep = "_"))
largestLoopsDf <- as.data.frame(largestLoopsGr) %>% dplyr::mutate(loopID = paste(seqnames, start, end, sep = "_"))
loopIDpairs <- data.frame(loopID = completeOverlapDf$loopIDAll[queryHits(overlaps)],
loopID2 = largestLoopsDf$loopID[subjectHits(overlaps)])
# allLoopsAnnotated <- allLoops %>% dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>% dplyr::left_join(loopIDpairs, by = c("loopID"))
##################################
diffCutoff <- 0.2
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = diffCutoff,
annoList = c("P-P", "P-E", "P-S", "P-X")) %>% unnest(gene) %>%
dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_"))
temp <- geneAnnoData %>% dplyr::left_join(loopIDpairs, by = c("loopID"))
temp2 <- temp %>% dplyr::select(gene, loopID2) %>%
group_by(gene) %>% # Group data by the 'gene' column
summarize(geneHasSS = !all(is.na(loopID2))) # Check if all loopID2 values for each gene are NA
temp <- temp %>% left_join(temp2, by = c("gene"))
data.all <- temp %>% dplyr::mutate(group = ifelse(!geneHasSS, "No",
ifelse(is.na(loopID2), "Outside", "Within")),
size = start2 - start1)
##################################
outfile <- data.all %>% dplyr::select(loopID2, gene) %>% dplyr::filter(!is.na(loopID2)) %>%
separate(loopID2, into = c("chrom", "start", "end"), sep = "_", convert = TRUE)
# fwrite(outfile, here(consensusDir, "convergent_ss_domain.bed"), sep = "\t", col.names = FALSE)
data <- data.all %>% dplyr::filter(Anno2 %in% c("P-E"))
temp <- data %>% dplyr::select(chrom1, start1, end1, chrom2, start2, end2, group) %>% distinct()
temp1 <- temp %>% dplyr::filter(group == "No") %>% dplyr::select(-group)
fwrite(temp1, here(consensusDir, "insulated_domain_ss_no.bedpe"), sep = "\t", col.names = FALSE)
temp2 <- temp %>% dplyr::filter(group == "Within") %>% dplyr::select(-group)
fwrite(temp2, here(consensusDir, "insulated_domain_ss_within.bedpe"), sep = "\t", col.names = FALSE)
temp3 <- temp %>% dplyr::filter(group == "Outside") %>% dplyr::select(-group)
fwrite(temp3, here(consensusDir, "insulated_domain_ss_outside.bedpe"), sep = "\t", col.names = FALSE)
temp <- data %>% dplyr::select(id, group, size) %>% distinct()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$size
distance2 <- (data %>% dplyr::filter(group ==group2) )$size
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))
p <- ggplot(temp, aes(x = group, y = size, fill = group)) +
geom_violin(linewidth = lineMedium* mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() + scale_y_continuous(labels = label_kb_mb) +
scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "Loop size")
fileName <- paste0("insulationBoundary_size_convSS_PE")
width <-33*mmToInch
height <- 35*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$diff_dTAG_DMSO
distance2 <- (data %>% dplyr::filter(group ==group2) )$diff_dTAG_DMSO
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
temp <- data %>% dplyr::select(group, id, diff_dTAG_DMSO) %>% distinct()
convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))
p <- ggplot(data, aes(x = group, y = diff_dTAG_DMSO, fill = group)) +
geom_violin(linewidth = lineMedium * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() +
scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "Δ loop score") +
geom_hline(yintercept = 0,
alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_hline(yintercept = -0.2,
alpha = 0.5,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square", linetype = "dashed") +
coord_cartesian(ylim = c(-0.8, 0.5))
fileName <- paste0("insulationBoundary_delta_convSS_pe")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
######################
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
data2 <- data %>% left_join(obsexp, by = c("id"))
temp <- data2 %>% dplyr::select(group, id, log_obsexp_diff_dTAG_DMSO) %>% distinct()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$log_obsexp_diff_dTAG_DMSO
distance2 <- (data %>% dplyr::filter(group ==group2) )$log_obsexp_diff_dTAG_DMSO
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
convPvalue(getPvalWilcox(temp, "No", "Within"))
convPvalue(getPvalWilcox(temp, "No", "Outside"))
convPvalue(getPvalWilcox(temp, "Outside", "Within"))
p <- ggplot(temp, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) +
geom_violin(linewidth = lineMedium* mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() +
scale_fill_manual(values = c("#777777", "#F28E2C", "#F28E2C")) +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "log2(fc of obs/exp)") +
geom_hline(yintercept = 0,
alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
coord_cartesian(ylim = c(-2, 2))
width <-30*mmToInch
height <- 35*mmToInch
fileName <- paste0("insulationBoundary_obsexp_convSS_pe")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
#####################
data <- data.all
gene.withinBoundary <- unique((data %>% dplyr::filter(group == "Within"))$gene)
gene.outsideBoundary <- unique((data %>% dplyr::filter(group == "Outside"))$gene)
gene.noBoundary <- unique((data %>% dplyr::filter(group == "No"))$gene)
gene.wBoundary <- unique(c(gene.withinBoundary, gene.outsideBoundary))
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
print("perc of gene with loop within Boundary")
sum(gene.group1 %in% gene.withinBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.withinBoundary)/length(gene.group2)*100
print("perc of gene with loop outside Boundary")
sum(gene.group1 %in% gene.outsideBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.outsideBoundary)/length(gene.group2)*100
print("perc of gene with loop with no Boundary")
sum(gene.group1 %in% gene.noBoundary)/length(gene.group1)*100
sum(gene.group2 %in% gene.noBoundary)/length(gene.group2)*100
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name) %>%
dplyr::mutate(group = case_when(
ensembl_gene_id %in% gene.wBoundary ~ "withinBoundary",
ensembl_gene_id %in% gene.noBoundary ~ "noBoundary",
TRUE ~ NA
)) %>%
dplyr::filter(!is.na(group)) %>%
dplyr::mutate(absLog2FC = abs(log2FoldChange))
ks_result <- ks.test(
diff.RNA %>% dplyr::filter(group == "withinBoundary") %>% pull(absLog2FC),
diff.RNA %>% dplyr::filter(group == "noBoundary") %>% pull(absLog2FC)
)
ggplot(diff.RNA, aes(x = group, y = log2FoldChange)) + geom_violin() + geom_boxplot(width = 0.1)
# getPvalWilcox <- function(data, group1, group2){
# distance1 <- (data %>% dplyr::filter(group ==group1) )$absLog2FC
# distance2 <- (data %>% dplyr::filter(group ==group2) )$absLog2FC
# wil <- wilcox.test(distance1, distance2)
# return(wil$p.value)
# }
# convPvalue(getPvalWilcox(diff.RNA, "withBoundary", "noBoundary"))
#
# Create the CDF plot
p <- ggplot(diff.RNA, aes(x = absLog2FC, color = group)) +
scale_color_manual(values = (c("#777777", "#F28E2C"))) +
stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
labs(
x = "Abs. log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 1)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
fileName <- paste0("log2FC_cdf_insulationBoundary_convSS_pe")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
temp <- allLoopsAnnotated %>% dplyr::filter(Anno2 %in% c("P-E")) %>% dplyr::mutate(hasSS = !is.na(loopID2))
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(hasSS ==group1) )$diff_dTAG_DMSO
distance2 <- (data %>% dplyr::filter(hasSS ==group2) )$diff_dTAG_DMSO
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, TRUE, FALSE))
ggplot(temp, aes(x = hasSS, y = diff_dTAG_DMSO)) + geom_violin(aes(fill = hasSS), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = hasSS), fun = mean,
geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
ggtitle("PE") + geom_hline(yintercept = 0)
# Counting loop types
allLoopsAnnotatedFiltered <- allLoopsAnnotated %>% dplyr::filter(!is.na(loopID2)) %>%
dplyr::filter(!(loopID == loopID2))
temp <- allLoopsAnnotatedFiltered %>% dplyr::select(loopID2, Anno2)
result <- temp %>%
group_by(loopID2, Anno2) %>%
summarise(count = n(), .groups = 'drop') %>%
group_by(loopID2) %>%
mutate(frequency = count / sum(count)) %>%
arrange(loopID2, Anno2)
result <- largest_loops %>% dplyr::left_join(result, by = c("loopID" = "loopID2"))
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
data <- tibble(
gene = c(gene.group1, gene.group2),
group = c(rep("group1", length(gene.group1)),
rep("group2", length(gene.group2)))
)
data <- data %>% dplyr::left_join(result, by = c("gene"))
data <- data %>% filter(complete.cases(.))
all_anno2 <- unique(data$Anno2)
# Calculate average frequency of Anno2 for each gene in each group
result <- data %>%
# Ensure all possible Anno2 values are present for each gene and group
complete(gene, Anno2 = all_anno2, fill = list(frequency = 0)) %>%
dplyr::mutate(group = case_when(
gene %in% gene.group1 ~ "group1",
gene %in% gene.group2 ~ "group2",
TRUE ~ NA
)) %>%
dplyr::select(group, Anno2, frequency)
ggplot(result, aes(x = Anno2, fill = group, y = frequency)) + geom_boxplot()
# View results
ggplot(largest_loops, aes(x = loop_width)) + geom_histogram()
### Checking how many genes from each group has encompassing S-S
gene.group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
gene.group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
data <- tibble(
gene = c(gene.group1, gene.group2),
group = c(rep("group1", length(gene.group1)),
rep("group2", length(gene.group2)))
)
data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
hasSS = ifelse(is.na(loop_width), "NO", "YES")
)
data_summary <- data %>%
group_by(group) %>%
summarize(
total = n(),
hasSS_yes = sum(hasSS == "YES"),
percentage_yes = (hasSS_yes / total) * 100
)
ggplot(data_summary, aes(x = group, y = percentage_yes, fill = group)) +
geom_bar(stat = "identity", color = "black") +
labs(
title = "Percentage of Each Group with hasSS = YES",
x = "Group",
y = "Percentage (%)"
) + ylim(0, 100) +
theme_minimal()
temp <- data %>% dplyr::filter(hasSS == "YES")
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$loop_width
distance2 <- (data %>% dplyr::filter(group ==group2) )$loop_width
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(temp, "group1", "group2"))
ggplot(temp, aes(x = group, y = loop_width)) + geom_violin() + geom_boxplot(width = 0.5) +
annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black")
### Comparing RNA perturbation
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(c(1, 4, 5)) %>%
dplyr::mutate(hasSS = ifelse(ensembl_gene_id %in% largest_loops$gene, "hasSS", "noSS"))
ggplot(diff.RNA, aes(x = abs(log2FoldChange), color = hasSS)) +
stat_ecdf(size = 0.4) +
labs(
x = "Absolute log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 1.5)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)
### Checking ChIP peak density
largest_loopsGr <- makeGRangesFromDataFrame(largest_loops,
seqnames.field = "loop_chr",
start.field = "loop_start",
end.field = "loop_end",
keep.extra.columns = TRUE)
# Find overlaps between ChIP-seq peaks and the largest loops
chip_overlaps <- findOverlaps(largest_loopsGr, peak.H3K4me3)
# Count the number of ChIP-seq peaks per loop
loop_peak_counts <- table(queryHits(chip_overlaps))
# Create a data frame with the counts and loop details
largest_loops$density <- 0 # Initialize density column
largest_loops$count <- 0 # Initialize peak count column
# Add peak counts to the corresponding loops
largest_loops[as.numeric(names(loop_peak_counts)), "count"] <- as.integer(loop_peak_counts)
# Calculate the density (peaks per kilobase)
largest_loops$density <- largest_loops$count / ((largest_loops$loop_width-1) / 1000)
data <- tibble(
gene = c(gene.group1, gene.group2),
group = c(rep("group1", length(gene.group1)),
rep("group2", length(gene.group2)))
)
data <- data %>% dplyr::left_join(largest_loops, by = c("gene"))
data <- data %>% dplyr::mutate(
hasSS = ifelse(is.na(loop_width), "NO", "YES")
) %>% dplyr::filter(hasSS == "YES")
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(group ==group1) )$density
distance2 <- (data %>% dplyr::filter(group ==group2) )$density
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
pv <- convPvalue(getPvalWilcox(data, "group1", "group2"))
ggplot(data, aes(x = group, y = density)) + geom_violin(aes(fill = group), show.legend = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(aes(group = group), fun = mean,
geom = "point", shape = 21, size = 2, fill = "red", color = "black") +
theme_classic() +
annotate("text", x = 1, y = 0, label = pv, size = 5, color = "black") +
ggtitle("H3K4me3")
name <- "chromo_cons_annoHierarchy"
# UP loop
loop.up <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_UP_diff0.2.bedpe")) %>%
dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))
# NO loop
loop.no <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_NO_diff0.2.bedpe")) %>%
dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))
# UP NO
loop.upno <- bind_rows(loop.up, loop.no)
# DOWN loop
loop.down <- fread(here(consensusDir, "chromo_cons_annoHierarchy_pe-pe_dTAGvsDMSO_DOWN_diff0.2.bedpe")) %>%
dplyr::mutate(loopID = paste(V1, V2, V6, sep = "_"))
allLoops <- fread(here(consensusDir, "chromo_cons_annoHierarchy.tsv")) %>%
dplyr::mutate(loopID = paste(chrom1, start1, end2, sep = "_")) %>%
dplyr::mutate(group = case_when(
loopID %in% loop.upno$loopID ~ "UP&NO",
loopID %in% loop.down$loopID ~ "DOWN",
TRUE ~ NA
)) %>%
dplyr::filter(!is.na(group))
minValue <- -4
obsexp <- fread(here(consensusDir, paste0("loopScore_cons_obsexp.tsv"))) %>%
dplyr::mutate(log_obsexp_DMSO = if_else(obsexp_DMSO == 0, minValue, log2(obsexp_DMSO)),
log_obsexp_dTAG = if_else(obsexp_dTAG == 0, minValue, log2(obsexp_dTAG)),
log_obsexp_A485 = if_else(obsexp_A485 == 0, minValue, log2(obsexp_A485)),
log_obsexp_diff_dTAG_DMSO = log_obsexp_dTAG - log_obsexp_DMSO,
log_obsexp_diff_A485_DMSO = log_obsexp_A485 - log_obsexp_DMSO)
allLoops <- allLoops %>% left_join(obsexp, by = c("id"))
allLoops$group <- factor(allLoops$group, levels = c("UP&NO", "DOWN"))
### Plotting
p1 <- ggplot(allLoops, aes(x = group, y = diff_A485_DMSO, fill = group)) +
geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
scale_fill_manual(values = c("grey50", "grey50")) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "Δ loop score") +
geom_hline(yintercept = 0,
alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_hline(yintercept = -0.2,
alpha = 0.5,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square", linetype = "dashed") +
coord_cartesian(ylim = c(-0.5, 0.5))
####
p2 <- ggplot(allLoops, aes(x = group, y = log_obsexp_diff_A485_DMSO, fill = group)) +
geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
scale_fill_manual(values = c("grey50", "grey50")) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "log2(fc of obs/exp)") +
geom_hline(yintercept = 0,
alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
coord_cartesian(ylim = c(-2, 2))
#############
### Plotting
p3 <- ggplot(allLoops, aes(x = group, y = diff_dTAG_DMSO, fill = group)) +
geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
scale_fill_manual(values = c("grey50", "grey50")) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "Δ loop score") +
geom_hline(yintercept = 0,
alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
geom_hline(yintercept = -0.2,
alpha = 0.5,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square", linetype = "dashed") +
coord_cartesian(ylim = c(-0.5, 0.5))
####
p4 <- ggplot(allLoops, aes(x = group, y = log_obsexp_diff_dTAG_DMSO, fill = group)) +
geom_violin(linewidth = lineThick * mmToLineUnit, lineend = "square", alpha = .4, , show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineThick * mmToLineUnit, lineend = "square",
outlier.shape = NA, alpha = 0.6, show.legend = FALSE) +
theme_classic() +
stat_summary(
aes(group = group), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black", position = position_dodge(.3)
) +
scale_fill_manual(values = c("grey50", "grey50")) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.text.x = element_text(
size = fontSizeM,
angle = 45, # Rotate x-axis labels 45 degrees
hjust = 1, # Adjust horizontal justification
vjust = 1 # Adjust vertical justification
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +labs(y = "log2(fc of obs/exp)") +
geom_hline(yintercept = 0,
alpha = 1,
color = "black",
size = lineThick*mmToLineUnit,
lineend = "square") +
coord_cartesian(ylim = c(-2, 2))
width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_dTAG")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p1, p2))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(p1, p2))
dev.off()
width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_A485")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p3, p4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(p3, p4))
dev.off()
width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_loopScore")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p1, p3))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(p1, p3))
dev.off()
width <- panelSize(2)*mmToInch
height <- panelSize(1.5)*mmToInch
fileName <- paste0("regLoopScorefromdTAG_log2fc")
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(plot_grid(p2, p4))
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(plot_grid(p2, p4))
dev.off()
peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
##########################################################
flankSize <- 5000
gene.tb <- fread(here(refDir, "mm10_GRCm38.p6_gene_sorted.bed")) %>%
dplyr::mutate(TSS = ifelse(V4 == "+", V2, V3),
TSSstart = TSS - flankSize,
TSSend = TSS + flankSize) %>%
dplyr::select(V6, V5, V1, TSSstart, TSSend)
colnames(gene.tb) <- c("ensembl_gene_id", "external_gene_name", "chr", "start", "end")
# Convert gene.tb to a GRanges object
gene_gr <- GRanges(
seqnames = gene.tb$chr,
ranges = IRanges(start = gene.tb$start, end = gene.tb$end),
gene_id = gene.tb$ensembl_gene_id
)
# Count overlaps between peaks and genes
overlap_counts <- countOverlaps(gene_gr, peak.RAD21)
# Add overlap counts to the original gene.tb data
gene.tb$peak_count <- overlap_counts
###############################################################################
peakNum0 <- (gene.tb %>% dplyr::filter(peak_count == 0))$ensembl_gene_id
peakNum1 <- (gene.tb %>% dplyr::filter(peak_count == 1))$ensembl_gene_id
peakNum2 <- (gene.tb %>% dplyr::filter(peak_count == 2))$ensembl_gene_id
peakNum3 <- (gene.tb %>% dplyr::filter(peak_count == 3))$ensembl_gene_id
peakNumOver4 <- (gene.tb %>% dplyr::filter(peak_count >= 4))$ensembl_gene_id
###############################################################################
name <- "chromo_cons_annoHierarchy"
alpha <- 0.05
fcCutoff <- 0.5
diff.RNA.G1.dTAG <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha, abs(shrinked_log2FC) > fcCutoff)
diff.RNA.G1.dTAG.noFCcutoff <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::filter(padj < alpha)
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-P", "P-E", "P-S", "P-X"))%>%
dplyr::mutate(distance = start2 - start1,
peakID = paste(chrom1, start1, start2, sep = "_"))
## Dividing genes into groups
temp <- geneAnnoData %>% dplyr::select(diff_dTAG_DMSO, distance, gene) %>%
unnest(gene) %>% group_by(gene) %>%
summarize(mean_diff_score = mean(diff_dTAG_DMSO),
mean_distance = mean(distance),
.groups = 'drop')
diff.RNA <- fread(here(refDir, "diff_G1.dTAG_G1.2i.dTAG_vs_G1.2i.DMSO.tsv")) %>%
dplyr::select(ensembl_gene_id, log2FoldChange, shrinked_log2FC, padj, external_gene_name)
maxLog2FC = 2
temp <- left_join(temp, diff.RNA, by = c("gene" = "ensembl_gene_id")) %>%
dplyr::mutate(flag = ifelse(gene %in% diff.RNA.G1.dTAG$ensembl_gene_id, "2DOWN", "0NO"),
maxFlag = (abs(shrinked_log2FC) > maxLog2FC),
log2fcMax = pmax(pmin(shrinked_log2FC, maxLog2FC), -maxLog2FC)) %>%
dplyr::arrange(flag) %>%
drop_na(shrinked_log2FC)
temp <- temp %>% dplyr::mutate(
pnOver = ifelse(gene %in% peakNum0, "peakNum0",
ifelse(gene %in% peakNum1, "peakNum1",
ifelse(gene %in% peakNum2, "peakNum2",
ifelse(gene %in% peakNum3, "peakNum3",
ifelse(gene %in% peakNumOver4, "peakNumOver4", NA)))))) %>%
drop_na(pnOver)
###############################################################################
# Checking percentage of genes with RAD21 peaks
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
aaa <- temp %>% dplyr::mutate(group = case_when(
gene %in% group1 ~ "group1",
gene %in% group2 ~ "group2",
TRUE ~ NA
)) %>%
dplyr::select(group, pnOver, gene) %>%
dplyr::filter(group %in% c("group1", "group2")) %>%
dplyr::mutate(hasPeak = ifelse(pnOver == "peakNum0", FALSE, TRUE))
bbb <- aaa %>% dplyr::filter(group == "group1")
sum(bbb$hasPeak)
bbb <- aaa %>% dplyr::filter(group == "group2")
sum(bbb$hasPeak)
aaa_summary <- aaa %>%
group_by(group, pnOver) %>%
summarize(count = n(), .groups = "drop")
# Create the stacked barplot
aaa_summary <- aaa %>%
group_by(group, pnOver) %>%
summarize(count = n(), .groups = "drop") %>%
group_by(group) %>%
mutate(ratio = count / sum(count))
# Define the gradient colors
gradient_colors <- c("#D4D4D4", "#DCB0AF", "#E48D8A", "#EC6965", "#F44641")
# gradient_colors <- c("#D4D4D4", "#A2BBCA", "#71A2C0", "#3F89B7", "#0E71AD")
# Create the stacked barplot with gradient colors
p <- ggplot(aaa_summary, aes(x = group, y = ratio, fill = pnOver)) +
geom_bar(stat = "identity", position = "stack",
linewidth = lineMedium * mmToLineUnit, lineend = "square") +
theme_classic() + labs(x = NULL , y = "Ratio") +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +
scale_fill_manual(values = gradient_colors,
guide = guide_legend(
override.aes = list(size = 0.5), # Adjust legend key symbol size
keywidth = 2 / 2.54, # Convert 2mm to cm
keyheight = 2 / 2.54 # Convert 2mm to cm
))
width <- panelSize(1.7)*mmToInch
height <- panelSize(1.3)*mmToInch
fileName <- paste0("rad21perc_promoter")
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
getPvalWilcox <- function(data, group1, group2){
distance1 <- (data %>% dplyr::filter(pnOver ==group1) )$mean_diff_score
distance2 <- (data %>% dplyr::filter(pnOver ==group2) )$mean_diff_score
wil <- wilcox.test(distance1, distance2)
return(wil$p.value)
}
ps01 <- round(getPvalWilcox(temp, "peakNum0", "peakNum1"), 5)
ps12 <- round(getPvalWilcox(temp, "peakNum1", "peakNum2"), 5)
ps23 <- round(getPvalWilcox(temp, "peakNum2", "peakNum3"), 5)
ps34 <- round(getPvalWilcox(temp, "peakNum3", "peakNumOver4"), 5)
ps24 <- round(getPvalWilcox(temp, "peakNum2", "peakNumOver4"), 5)
ps14 <- round(getPvalWilcox(temp, "peakNum1", "peakNumOver4"), 5)
ps04 <- round(getPvalWilcox(temp, "peakNum0", "peakNumOver4"), 5)
p <- ggplot(temp, aes(x = pnOver, y = mean_diff_score)) +
geom_violin(aes(fill = pnOver),
color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
show.legend = FALSE) +
geom_boxplot(width = 0.3, color = "black",
linewidth = lineMedium * mmToLineUnit, lineend = "square",
outlier.shape = NA
) + theme_classic() + labs(x = NULL , y = "Average Δ loop score") +
stat_summary(
aes(group = pnOver), fun = mean,
geom = "point", shape = 21, size = 0.5,
fill = "black", color = "black"
) +
geom_hline(yintercept = 0, linewidth = lineThick * mmToLineUnit) +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.text.x = element_text(
angle = 45, hjust = 1, vjust = 1
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
)+
annotate("text", x = 1, y = 0, label = paste0("ps01: ", convPvalue(ps01), "\n",
"ps12: ", convPvalue(ps12), "\n",
"ps23: ", convPvalue(ps23), "\n",
"ps34: ", convPvalue(ps34), "\n",
"ps24: ", convPvalue(ps24), "\n",
"ps14: ", convPvalue(ps14), "\n",
"ps04: ", convPvalue(ps04), "\n"),
color = "black", hjust = 0, size = 2) +
scale_fill_manual(values = c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C")) +
coord_cartesian(ylim = c(-0.5, 0.1))
fileName <- paste0("diffScore_barplot_RAD21peakWithin10kb_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-38*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
temp <- temp %>% dplyr::mutate(absLog2FC = abs(log2FoldChange))
p <- ggplot(temp, aes(x = absLog2FC, color = pnOver)) +
scale_color_manual(values = (c("#777777", "#8B7E65", "#A28452", "#C2884D", "#F28E2C"))) +
stat_ecdf(size = 0.4, linewidth = lineMedium * mmToLineUnit, lineend = "square" ) + # Use stat_ecdf to plot the empirical CDF
labs(
x = "Abs. log2(fold change)",
y = "Cumulative Probability"
) + coord_cartesian(xlim = c(0, 1.5)) +
theme_classic() + # Clean theme
theme(
axis.title = element_text(
size = fontSizeM,
family = fontType,
color = "#000000"
),
axis.text = element_text(
size = fontSizeS,
family = fontType,
color = "#000000"
),
axis.line = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
axis.ticks = element_line(
color = "#000000",
size = lineThick*mmToLineUnit,
lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) + scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
fileName <- paste0("log2FC_cdf_psGroup_dTAG_vs_DMSO")
width <- 33*mmToInch
height <-33*mmToInch
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
peak.RAD21 <- importPeak(here(refDir, "33250_RAD21_ab992_Bruce-4_peaks.mergePeak.bed"))
group1 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup1.tsv"))$gene
group2 <- fread(here(refDir, "geneList_dTAG_vs_DMSO_RNA_loop_binaryGroup2.tsv"))$gene
##########################################################
name <- "chromo_cons_annoHierarchy"
geneAnnoData <- loadLoopAnnoData(here(consensusDir, paste0(name, "_p-n_ensemblList.tsv")),
diffCutoff = 0.2,
annoList = c("P-E"))
temp1 <- geneAnnoData %>% dplyr::filter(A1 == "E") %>% dplyr::select(chrom1, start1, end1, gene)
colnames(temp1) <- c("chr", "start", "end", "gene")
temp2 <- geneAnnoData %>% dplyr::filter(A2 == "E") %>% dplyr::select(chrom2, start2, end2, gene)
colnames(temp2) <- c("chr", "start", "end", "gene")
enhAnchors <- bind_rows(temp1, temp2) %>% unnest(gene) %>%
dplyr::mutate(
group = case_when(
gene %in% group1 ~ "Grp1",
gene %in% group2 ~ "Grp2",
TRUE ~ NA
)
) %>% dplyr::filter(!is.na(group))
enh <- makeGRangesFromDataFrame(enhAnchors %>% dplyr::select(c(1, 2, 3)))
# Count overlaps between peaks and genes
overlap_counts <- countOverlaps(enh, peak.RAD21)
# Add overlap counts to the original gene.tb data
enhAnchors$peak_count <- overlap_counts
enhAnchors <- enhAnchors %>% dplyr::mutate(
peakGroup = case_when(
peak_count == 0 ~ "peakNum0",
peak_count == 1 ~ "peakNum1",
peak_count == 2 ~ "peakNum2",
peak_count == 3 ~ "peakNum3",
peak_count >= 4 ~ "peakNumOver4"
)
)
aaa <- enhAnchors %>%
dplyr::select(group, peakGroup, gene) %>%
dplyr::mutate(hasPeak = ifelse(peakGroup == "peakNum0", FALSE, TRUE))
bbb <- aaa %>% dplyr::filter(group == "Grp1")
sum(bbb$hasPeak)/nrow(bbb)
bbb <- aaa %>% dplyr::filter(group == "Grp2")
sum(bbb$hasPeak)/nrow(bbb)
aaa_summary <- aaa %>%
group_by(group, peakGroup) %>%
summarize(count = n(), .groups = "drop")
# Create the stacked barplot
aaa_summary <- aaa %>%
group_by(group, peakGroup) %>%
summarize(count = n(), .groups = "drop") %>%
group_by(group) %>%
mutate(ratio = count / sum(count))
# Define the gradient colors
# gradient_colors <- c("#D4D4D4", "#DCB0AF", "#E48D8A", "#EC6965", "#F44641")
gradient_colors <- c("#D4D4D4", "#A2BBCA", "#71A2C0", "#3F89B7", "#0E71AD")
# Create the stacked barplot with gradient colors
p <- ggplot(aaa_summary, aes(x = group, y = ratio, fill = peakGroup)) +
geom_bar(stat = "identity", position = "stack",
linewidth = lineMedium * mmToLineUnit, lineend = "square") +
theme_classic() + labs(x = NULL , y = "Ratio") +
theme(
axis.title = element_text(
size = fontSizeM, family = fontType, color = "#000000"
),
axis.text = element_text(
size = fontSizeS, family = fontType, color = "#000000"
),
axis.line = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
axis.ticks = element_line(
color = "#000000", size = lineThick * mmToLineUnit, lineend = "square"
),
panel.background = element_rect(fill = "transparent"),
legend.text = element_text(family = fontType, size = fontSizeS),
legend.title = element_text(family = fontType, size = fontSizeS)
) +
scale_fill_manual(values = gradient_colors,
guide = guide_legend(
override.aes = list(size = 0.5), # Adjust legend key symbol size
keywidth = 2 / 2.54, # Convert 2mm to cm
keyheight = 2 / 2.54 # Convert 2mm to cm
))
width <- panelSize(1.7)*mmToInch
height <- panelSize(1.3)*mmToInch
fileName <- paste0("rad21perc_enh")
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()
data <- fread(here(resultDir, "chromHMM", "A485_pe-pe_anchors", "overlap_enrich_100_state.txt"))
colnames(data) <- c("state", "genome", "pe-pe_A485_down", "pe-pe_A485_no", "pe-pe_A485_up")
data <- data %>% dplyr::select(-genome) %>% as.matrix()
column_to_rownames(var = "state") %>%
as.matrix()
library(circlize)
col_fun <- colorRamp2(c(0, 1, 10),
c("blue", "white", "red"))
library(circlize)
col_fun <- colorRamp2(c(min(data), max(data)), c("white", "red"))
#fviz_nbclust(data, kmeans, method = "wss")
p <- Heatmap(
data,
name = "Odds Ratio", # Name of the heatmap legend
cluster_columns = FALSE, # Remove column dendrogram
row_km = 10, # Define the number of k-means clusters for rows (adjust as needed)
show_row_dend = FALSE,
col = col_fun,
border = TRUE
)
fileName <- paste0("anchorLOLA_dTAG_vs_DMSO_diff0.2_allLoops_extreme_regAnchorBackground_atac")
height <- 7
width <- 3.5
png(here(figDir, paste0(fileName, ".png")), res = 600, unit = "in", height = height, width = width)
print(p)
dev.off()
svglite(here(figDir, paste0(fileName, ".svg")), height = height, width = width)
print(p)
dev.off()